Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement sample weight #324

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0bfe5f3
add manifest to simplify non-develop install
peterfoley Jan 23, 2019
87423f8
record the number of iterations and convergence status
peterfoley Mar 1, 2019
5ba9fab
test glmnet with nonzero reg_lambda, alpha
peterfoley Mar 5, 2019
34be299
recalculate z every iteration in GLM._cdfast
peterfoley Mar 5, 2019
2350c7e
flake8 fixes in test
peterfoley Mar 5, 2019
48d6759
don't cache z outside _cdfast
peterfoley Mar 6, 2019
42315f8
remove MANIFEST.in so it can be created properly in a later PR
peterfoley Mar 6, 2019
d80b11f
remove a dangling creation of z cache
peterfoley Mar 6, 2019
92238b2
resolved remaining flake8 issues by disabling checks
peterfoley Mar 6, 2019
2c06fbb
resolve flake8 indentation error
peterfoley Mar 6, 2019
a9209f5
update test_cdfast to remove z from _cdfast interface
peterfoley Mar 6, 2019
2674333
fail test_glmnet based on loss increase runlength
peterfoley Mar 6, 2019
5175d24
mkl dylibs are unavailable on travis
peterfoley Mar 6, 2019
2e29239
add a test that uses sample_weight parameter
peterfoley Dec 13, 2018
192d938
implement sample weights
peterfoley Dec 13, 2018
bc9b9df
update cheatsheet with weighted loss and grad/hess calculations
peterfoley Dec 14, 2018
832452b
typo fixes and formatting cleanup to reduce flake8 warnings/errors
peterfoley Jan 4, 2019
d26abe9
have setuptools build package list
peterfoley Jan 23, 2019
4e97389
remove math.inf for python 2.7 compatibility
peterfoley Mar 7, 2019
6c250a0
merging cdfast convergence fixes
peterfoley Mar 7, 2019
217dc5f
flake8 fixes
peterfoley Mar 7, 2019
cc0bd9c
resolve indentation flake8 errors
peterfoley Mar 7, 2019
8a6dec4
logger.warn is deprecated in favor of logger.warning
peterfoley Mar 7, 2019
e8f038a
use scipy.special.comb instead of removed scipy.misc.comb
May 17, 2019
8255617
Merge pull request #1 from peterfoley605/fix_scipy
May 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
record the number of iterations and convergence status
also log a warning if GLM.fit() doesn't converge
  • Loading branch information
peterfoley committed Mar 1, 2019
commit 87423f8bea02416da6a8d4f4d6341b60b8c239e1
33 changes: 29 additions & 4 deletions pyglmnet/pyglmnet.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Python implementation of elastic-net regularized GLMs."""

from copy import deepcopy
from math import inf

import numpy as np
from scipy.special import expit
Expand Down Expand Up @@ -478,6 +479,8 @@ def __init__(self, distr='poisson', alpha=0.5,
self.solver = solver
self.learning_rate = learning_rate
self.max_iter = max_iter
self.n_iter = 0
self.converged = False
self.beta0_ = None
self.beta_ = None
self.ynull_ = None
Expand Down Expand Up @@ -717,14 +720,21 @@ def fit(self, X, y):

# Iterative updates
for t in range(0, self.max_iter):
logger.info("t: %i" % t)
convergence_metric = inf
if self.solver == 'batch-gradient':
grad = _grad_L2loss(self.distr,
alpha, self.Tau,
reg_lambda, X, y, self.eta,
beta)
# Converged if the norm(gradient) < tol
if (t > 1) and (np.linalg.norm(grad) < tol):
msg = ('\tConverged in {0:d} iterations'.format(t))
convergence_metric = np.linalg.norm(grad)
logger.info("convergence_metric: %f" % convergence_metric)
if (t > 1) and (convergence_metric < tol):
self.converged = True
self.n_iter += t
msg = ('\tConverged in {0:d} iterations'
.format(self.n_iter))
logger.info(msg)
break
beta = beta - self.learning_rate * grad
Expand All @@ -734,8 +744,13 @@ def fit(self, X, y):
beta, z = \
self._cdfast(X, y, z, ActiveSet, beta, reg_lambda)
# Converged if the norm(update) < tol
if (t > 1) and (np.linalg.norm(beta - beta_old) < tol):
msg = ('\tConverged in {0:d} iterations'.format(t))
convergence_metric = np.linalg.norm(beta - beta_old)
logger.info("convergence_metric: %f" % convergence_metric)
if (t > 1) and (convergence_metric < tol):
self.converged = True
self.n_iter += t
msg = ('\tConverged in {0:d} iterations'
.format(self.n_iter))
logger.info(msg)
break
# Apply proximal operator
Expand All @@ -749,6 +764,16 @@ def fit(self, X, y):
# Compute and save loss if callbacks are requested
if callable(self.callback):
self.callback(beta)
else:
# Warn if it hit max_iter without converging
self.converged = False
self.n_iter += t + 1
msg = ('\t'
'Failed to converge after {0:d} iterations.'
' Last convergence metric was {1:f}'
' and convergence threshold {2:f}.'
).format(self.n_iter, convergence_metric, tol)
logger.warn(msg)

# Update the estimated variables
self.beta0_ = beta[0]
Expand Down