Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH - Add Pinball datafit #134

Merged
merged 31 commits into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
413ef54
remove sqrt n_samples
Badr-MOUFAD Nov 30, 2022
2ef5eb7
update unittest
Badr-MOUFAD Nov 30, 2022
5c0bedc
info comment statsmodels
Badr-MOUFAD Dec 1, 2022
ca6ece7
add prox subdiff to sqrt df
Badr-MOUFAD Dec 1, 2022
a6303e5
implement ``PDCD_WS``
Badr-MOUFAD Dec 1, 2022
e8fcee3
r sqrt_n from CB
Badr-MOUFAD Dec 1, 2022
339e98f
Merge branch 'r-sqrt-n' of https://github.com/Badr-MOUFAD/skglm into …
Badr-MOUFAD Dec 1, 2022
19a0ea9
bug w and subdiff
Badr-MOUFAD Dec 1, 2022
e01451d
unittest sqrt
Badr-MOUFAD Dec 1, 2022
dd36b88
add docs
Badr-MOUFAD Dec 1, 2022
523419b
fix docs SqrtQuadratic
Badr-MOUFAD Dec 1, 2022
71de179
Merge branch 'main' of https://github.com/scikit-learn-contrib/skglm …
Badr-MOUFAD Dec 2, 2022
63a547b
subdiff --> fixed_point
Badr-MOUFAD Dec 4, 2022
f78d17d
efficient prox conjugate && fix tests
Badr-MOUFAD Dec 5, 2022
d0ae3a4
remove go
Badr-MOUFAD Dec 5, 2022
ad36485
MM remarks
Badr-MOUFAD Dec 5, 2022
f60bd59
fix test && clean ups
Badr-MOUFAD Dec 5, 2022
5a5f1ba
MM round 2 remarks
Badr-MOUFAD Dec 5, 2022
4f27c56
CI Trigger
Badr-MOUFAD Dec 5, 2022
fe45faa
implement pinball
Badr-MOUFAD Dec 6, 2022
3ce886f
unittest
Badr-MOUFAD Dec 6, 2022
6928502
fix pinball value && ST step
Badr-MOUFAD Dec 6, 2022
1271288
more unittest
Badr-MOUFAD Dec 6, 2022
bd1984a
fix bug prox pinball
Badr-MOUFAD Dec 6, 2022
36100c7
Merge branch 'main' of https://github.com/scikit-learn-contrib/skglm …
Badr-MOUFAD Dec 8, 2022
1a03c60
MM remarks
Badr-MOUFAD Dec 8, 2022
4b3ea45
Update skglm/experimental/quantile_regression.py
mathurinm Dec 8, 2022
9cf2216
pinball expression
Badr-MOUFAD Dec 8, 2022
626b71d
Merge branch 'pinball-df' of https://github.com/Badr-MOUFAD/skglm int…
Badr-MOUFAD Dec 8, 2022
8e93720
sqrt --> pinball
Badr-MOUFAD Dec 8, 2022
0a247f0
quantile --> quantile_level
Badr-MOUFAD Dec 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions skglm/experimental/quantile_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
from numba import float64
from skglm.datafits import BaseDatafit
from skglm.utils.prox_funcs import ST_vec


class Pinball(BaseDatafit):
r"""Pinball datafit.

The datafit reads::

quantile * max(y - Xw, 0) + (1 - quantile) * max(Xw - y, 0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: the real value does not involve np.max, it is np.maximum(...). sum().
Maybe rewrite as a sum and use _i to denote sample indices ? check how sklearn does it

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked up scikit-learn source code, but they don't specify the expression.

I am more with the usage of sum and _ to indicate samples.


with ``quantile`` in [0, 1].

Parameters
----------
quantile : float
Quantile must be in [0, 1]. When ``quantile=0.5``,
the datafit becomes a Least Absolute Deviation (LAD) datafit.
"""

def __init__(self, quantile):
self.quantile = quantile

def value(self, y, w, Xw):
# implementation taken from
# github.com/benchopt/benchmark_quantile_regression/blob/main/objective.py
quantile = self.quantile

residual = y - Xw
sign = residual >= 0

loss = quantile * sign * residual - (1 - quantile) * (1 - sign) * residual
return np.sum(loss)

def prox(self, w, step, y):
"""Prox of ``step * ||y - . ||``."""
shift_cst = (self.quantile - 1/2) * step
return y - ST_vec(y - w - shift_cst, step / 2)

def prox_conjugate(self, z, step, y):
"""Prox of ``step * ||y - . ||^*``."""
# using Moreau decomposition
inv_step = 1 / step
return z - step * self.prox(inv_step * z, inv_step, y)

def subdiff_distance(self, Xw, z, y):
"""Distance of ``z`` to subdiff of ||y - . ||_1 at ``Xw``."""
# computation note: \partial ||y - . ||_1(Xw) = -\partial || . ||_1(y - Xw)
y_minus_Xw = y - Xw
shift_cst = self.quantile - 1/2

max_distance = 0.
for i in range(len(y)):

if y_minus_Xw[i] == 0.:
distance_i = max(0, abs(z[i] - shift_cst) - 1)
else:
distance_i = abs(z[i] + shift_cst + np.sign(y_minus_Xw[i]))

max_distance = max(max_distance, distance_i)

return max_distance

def get_spec(self):
spec = (
('quantile', float64),
)
return spec

def params_to_dict(self):
return dict(quantile=self.quantile)
38 changes: 38 additions & 0 deletions skglm/experimental/tests/test_quantile_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
import numpy as np
from numpy.linalg import norm

from skglm.penalties import L1
from skglm.experimental.pdcd_ws import PDCD_WS
from skglm.experimental.quantile_regression import Pinball

from skglm.utils.data import make_correlated_data
from sklearn.linear_model import QuantileRegressor


@pytest.mark.parametrize('quantile', [0.3, 0.5, 0.7])
def test_PDCD_WS(quantile):
n_samples, n_features = 50, 10
X, y, _ = make_correlated_data(n_samples, n_features, random_state=123)

# optimality condition for w = 0.
# for all g in subdiff pinball(y), g must be in subdiff ||.||_1(0)
# hint: use max(x, 0) = (x + |x|) / 2 to get subdiff pinball
alpha_max = norm(X.T @ (np.sign(y)/2 + (quantile - 0.5)), ord=np.inf)
alpha = alpha_max / 5

w = PDCD_WS(
dual_init=np.sign(y)/2 + (quantile - 0.5)
).solve(X, y, Pinball(quantile), L1(alpha))[0]

clf = QuantileRegressor(
quantile=quantile,
alpha=alpha/n_samples,
fit_intercept=False
).fit(X, y)

np.testing.assert_allclose(w, clf.coef_, atol=1e-5)


if __name__ == '__main__':
pass