Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement sample weight #324

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0bfe5f3
add manifest to simplify non-develop install
peterfoley Jan 23, 2019
87423f8
record the number of iterations and convergence status
peterfoley Mar 1, 2019
5ba9fab
test glmnet with nonzero reg_lambda, alpha
peterfoley Mar 5, 2019
34be299
recalculate z every iteration in GLM._cdfast
peterfoley Mar 5, 2019
2350c7e
flake8 fixes in test
peterfoley Mar 5, 2019
48d6759
don't cache z outside _cdfast
peterfoley Mar 6, 2019
42315f8
remove MANIFEST.in so it can be created properly in a later PR
peterfoley Mar 6, 2019
d80b11f
remove a dangling creation of z cache
peterfoley Mar 6, 2019
92238b2
resolved remaining flake8 issues by disabling checks
peterfoley Mar 6, 2019
2c06fbb
resolve flake8 indentation error
peterfoley Mar 6, 2019
a9209f5
update test_cdfast to remove z from _cdfast interface
peterfoley Mar 6, 2019
2674333
fail test_glmnet based on loss increase runlength
peterfoley Mar 6, 2019
5175d24
mkl dylibs are unavailable on travis
peterfoley Mar 6, 2019
2e29239
add a test that uses sample_weight parameter
peterfoley Dec 13, 2018
192d938
implement sample weights
peterfoley Dec 13, 2018
bc9b9df
update cheatsheet with weighted loss and grad/hess calculations
peterfoley Dec 14, 2018
832452b
typo fixes and formatting cleanup to reduce flake8 warnings/errors
peterfoley Jan 4, 2019
d26abe9
have setuptools build package list
peterfoley Jan 23, 2019
4e97389
remove math.inf for python 2.7 compatibility
peterfoley Mar 7, 2019
6c250a0
merging cdfast convergence fixes
peterfoley Mar 7, 2019
217dc5f
flake8 fixes
peterfoley Mar 7, 2019
cc0bd9c
resolve indentation flake8 errors
peterfoley Mar 7, 2019
8a6dec4
logger.warn is deprecated in favor of logger.warning
peterfoley Mar 7, 2019
e8f038a
use scipy.special.comb instead of removed scipy.misc.comb
May 17, 2019
8255617
Merge pull request #1 from peterfoley605/fix_scipy
May 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
flake8 fixes in test
  • Loading branch information
peterfoley committed Mar 5, 2019
commit 2350c7e6dff8ab8b98f4ddd271e7a00f2fd71abc
69 changes: 40 additions & 29 deletions tests/test_pyglmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_glmnet():

X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
y_train = simulate_glm(distr, beta0, beta, X_train,
sample=False)
sample=False)
alpha = 0.5
loss_trace = list()

Expand All @@ -190,54 +190,65 @@ def callback(beta):

loss_trace.append(
_loss(distr, alpha, Tau, reg_lambda,
X_train, y_train, eta, group, beta))
X_train, y_train, eta, group, beta))

glm = GLM(distr, learning_rate=learning_rate,
reg_lambda=reg_lambda, tol=1e-3, max_iter=5000,
alpha=alpha, solver=solver, score_metric=score_metric,
random_state=random_state, callback=callback)
reg_lambda=reg_lambda, tol=1e-3, max_iter=5000,
alpha=alpha, solver=solver,
score_metric=score_metric,
random_state=random_state, callback=callback)
assert(repr(glm))

glm.fit(X_train, y_train)

# verify loss decreases or increases only slightly
loss_trace_decreased = np.diff(loss_trace) <= 1e-3
loss_trace_decreased = np.diff(loss_trace) <= 1e-3
if np.any(~loss_trace_decreased):
idx_of_first_increase = 2+np.min(np.nonzero(~loss_trace_decreased))
assert np.all(loss_trace_decreased), (
'Loss increased between iterations'
' on distr={d} solver={s} with reg_lambda={l}'
'\n Loss trace:\n {tr}'
).format(d=distr, s=solver, l=reg_lambda,
tr=loss_trace[:idx_of_first_increase])
idx_of_first_increase = 2 + \
np.min(np.nonzero(~loss_trace_decreased))
assert np.all(loss_trace_decreased), \
('Loss increased between iterations'
' on distr={d} solver={s} with'
' reg_lambda={rl}'
'\n Loss trace:\n {lt}'
).format(d=distr, s=solver,
rl=reg_lambda,
lt=loss_trace[:idx_of_first_increase])

if reg_lambda == 0.0:
# check that the true model can be recreated almost perfectly
# when no regularization is applied
# check that the true model can be recreated
# almost perfectly when no regularization is applied
# verify loss at convergence = loss when beta=beta_
l_true = _loss(distr, alpha, np.eye(beta.shape[0]), reg_lambda,
X_train, y_train, 2.0, None,
np.concatenate(([beta0], beta)))
assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5,
err_msg=('Final loss trace value different from true loss '
' on distr={d} solver={s} with reg_lambda={l}'
).format(d=distr, s=solver, l=reg_lambda))
l_true = _loss(distr, alpha, np.eye(beta.shape[0]),
reg_lambda, X_train, y_train, 2.0, None,
np.concatenate(([beta0], beta)))
assert_allclose(loss_trace[-1], l_true,
rtol=1e-4, atol=1e-5,
err_msg=('Final loss trace value different'
' from true loss '
' on distr={d} solver={s}'
' with reg_lambda={rl}'
).format(d=distr, s=solver,
rl=reg_lambda))
# beta=beta_ when reg_lambda = 0.
assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2,
err_msg=('Fitted beta too different from true beta '
'in distr={} solver={}'.format(distr, solver)))
err_msg=('Fitted beta too different'
' from true beta'
' in distr={} solver={}'
).format(distr, solver))
betas_.append(glm.beta_)

y_pred = glm.predict(X_train)
assert y_pred.shape[0] == X_train.shape[0], (
'Fitted values have wrong number of rows in '
' on distr={d} solver={s} with reg_lambda={l}'
).format(d=distr, s=solver, l=reg_lambda)
assert y_pred.shape[0] == X_train.shape[0], \
('Fitted values have wrong number of rows in '
' on distr={d} solver={s} with reg_lambda={rl}'
).format(d=distr, s=solver, rl=reg_lambda)

# compare all solvers pairwise to make sure they're close
for i, first_beta in enumerate(betas_[:-1]):
for second_beta in betas_[i + 1:]:
assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)
assert_allclose(first_beta, second_beta,
rtol=0.05, atol=1e-2)

# test fit_predict
glm_poisson = GLM(distr='softplus')
Expand Down