typo fixes and formatting cleanup to reduce flake8 warnings/errors

glm-tools · jasmainak · Jan 23, 2019 · Mar 1, 2019 · Mar 5, 2019 · Mar 5, 2019
commit 832452b066aaef0298c71916448fec4cf211b599
diff --git a/pyglmnet/metrics.py b/pyglmnet/metrics.py
@@ -3,6 +3,7 @@
 import numpy as np
 from .pyglmnet import _logL
 
+
 def deviance(y, yhat, sample_weight, distr):
  """Deviance metrics.
 
@@ -34,6 +35,7 @@ def deviance(y, yhat, sample_weight, distr):
  score = -2 * (L1 - LS)
  return score
 
+
 def pseudo_R2(X, y, yhat, ynull_, sample_weight, distr):
  """Pseudo-R2 metric.
 
@@ -73,6 +75,7 @@ def pseudo_R2(X, y, yhat, ynull_, sample_weight, distr):
  score = (1 - L1 / L0)
  return score
 
+
 def accuracy(y, yhat, sample_weight):
  """Accuracy as ratio of correct predictions.
 

diff --git a/pyglmnet/pyglmnet.py b/pyglmnet/pyglmnet.py
@@ -116,29 +116,31 @@ def _logL(distr, y, y_hat, w, z=None):
  """The log likelihood."""
  if distr in ['softplus', 'poisson']:
  eps = np.spacing(1)
- logL = np.dot(w, y * np.log(y_hat + eps) - y_hat)
+ logL = np.sum(w * (y * np.log(y_hat + eps) - y_hat))
  elif distr == 'gaussian':
- logL = -0.5 * np.dot(w, (y - y_hat)**2)
+ logL = -0.5 * np.sum(w * ((y - y_hat) ** 2))
  elif distr == 'binomial':
 
  # prevents underflow
  if z is not None:
- logL = np.dot(w, y * z - np.log(1 + np.exp(z)))
+ logL = np.sum(w * (y * z - np.log(1 + np.exp(z))))
  # for scoring
  else:
- logL = np.dot(w, y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
+ logL = np.sum(w * (y * np.log(y_hat) +
+ (1 - y) * np.log(1 - y_hat)))
  elif distr == 'probit':
  if z is not None:
  pdfz, cdfz = norm.pdf(z), norm.cdf(z)
- logL = np.dot(w, y * _probit_g1(z, pdfz, cdfz) +
- (1 - y) * _probit_g2(z, pdfz, cdfz))
+ logL = np.sum(w * (y * _probit_g1(z, pdfz, cdfz) +
+ (1 - y) * _probit_g2(z, pdfz, cdfz)))
  else:
- logL = np.dot(w, y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
+ logL = np.sum(w * (y * np.log(y_hat) +
+ (1 - y) * np.log(1 - y_hat)))
  elif distr == 'gamma':
  # see
  # https://www.statistics.ma.tum.de/fileadmin/w00bdb/www/czado/lec8.pdf
  nu = 1. # shape parameter, exponential for now
- logL = np.dot(w, nu * (-y / y_hat - np.log(y_hat)))
+ logL = np.sum(w * (nu * (-y / y_hat - np.log(y_hat))))
  return logL
 
 
@@ -183,6 +185,7 @@ def _L1penalty(beta, group=None):
  L1penalty += np.linalg.norm(beta[group == 0], 1)
  return L1penalty
 
+
 def _loss(distr, alpha, Tau, reg_lambda, X, y, w, eta, group, beta):
  """Define the objective function for elastic net."""
  n_samples = X.shape[0]
@@ -193,6 +196,7 @@ def _loss(distr, alpha, Tau, reg_lambda, X, y, w, eta, group, beta):
  J = -L + reg_lambda * P
  return J
 
+
 def _L2loss(distr, alpha, Tau, reg_lambda, X, y, w, eta, group, beta):
  """Define the objective function for elastic net."""
  n_samples = X.shape[0]
@@ -250,6 +254,7 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, w, eta, beta):
  g[1:] = grad_beta
  return g
 
+
 def _gradhess_logloss_1d(distr, xk, y, w, z, eta):
  """
  Compute gradient (1st derivative)
@@ -281,7 +286,8 @@ def _gradhess_logloss_1d(distr, xk, y, w, z, eta):
 
  grad_s = s * (1 - s)
  grad_s_by_mu = grad_s / mu - s / (mu ** 2)
- hk = np.sum(w * grad_s * xk ** 2) - np.sum(w * y * grad_s_by_mu * xk ** 2)
+ hk = np.sum(w * grad_s * xk ** 2) - \
+ np.sum(w * y * grad_s_by_mu * xk ** 2)
 
  elif distr == 'poisson':
  mu = _mu(distr, z, eta)
@@ -662,7 +668,7 @@ def _cdfast(self, X, y, w, z, ActiveSet, beta, rl):
  beta[k], z = beta[k] - update, z - update * xk
  return beta, z
 
- def fit(self, X, y, sample_weight = None):
+ def fit(self, X, y, sample_weight=None):
  """The fit function.
 
  Parameters
@@ -785,7 +791,7 @@ def fit(self, X, y, sample_weight = None):
  # Update the estimated variables
  self.beta0_ = beta[0]
  self.beta_ = beta[1:]
- self.ynull_ = np.sum(sample_weight * y)/np.sum(sample_weight)
+ self.ynull_ = np.sum(sample_weight * y) / np.sum(sample_weight)
  return self
 
  def predict(self, X):
@@ -845,7 +851,7 @@ def predict_proba(self, X):
  yhat = np.asarray(yhat)
  return yhat
 
- def fit_predict(self, X, y, sample_weight):
+ def fit_predict(self, X, y, sample_weight=None):
  """Fit the model and predict on the same data.
 
  Parameters
@@ -862,7 +868,7 @@ def fit_predict(self, X, y, sample_weight):
  """
  return self.fit(X, y, sample_weight).predict(X)
 
- def score(self, X, y, sample_weight = None):
+ def score(self, X, y, sample_weight=None):
  """Score the model.
 
  Parameters
@@ -1088,7 +1094,7 @@ def copy(self):
  """
  return deepcopy(self)
 
- def fit(self, X, y, sample_weight = None):
+ def fit(self, X, y, sample_weight=None):
  """The fit function.
  Parameters
  ----------
@@ -1109,7 +1115,8 @@ def fit(self, X, y, sample_weight = None):
  sample_weight = np.ones_like(y)
  else:
  sample_weight /= np.mean(sample_weight)
- self.ynull_ = np.sum(sample_weight * y)/np.sum(sample_weight)
+
+ self.ynull_ = np.sum(sample_weight * y) / np.sum(sample_weight)
 
  if not type(int):
  raise ValueError('cv must be int. We do not support scikit-learn '
@@ -1202,7 +1209,7 @@ def predict_proba(self, X):
  """
  return self.glm_.predict_proba(X)
 
- def fit_predict(self, X, y, sample_weight = None):
+ def fit_predict(self, X, y, sample_weight=None):
  """Fit the model and predict on the same data.
 
  Parameters
@@ -1220,7 +1227,7 @@ def fit_predict(self, X, y, sample_weight = None):
  self.fit(X, y, sample_weight)
  return self.glm_.predict(X)
 
- def score(self, X, y, sample_weight = None):
+ def score(self, X, y, sample_weight=None):
  """Score the model.
 
  Parameters

diff --git a/tests/test_pyglmnet.py b/tests/test_pyglmnet.py
@@ -27,7 +27,7 @@ def test_sample_weight_cv():
  glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
  # check that cv and rest of sklearn interface works
  cv_scores = cross_val_score(glm_normal, X, y, fit_params={'sample_weight': w}, cv=cv)
- assert(len(scores) == 5)
+ assert(len(cv_scores) == 5)
 
  param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
@@ -341,7 +341,8 @@ def test_cdfast():
  z = beta_[0] + np.dot(X, beta_[1:])
  k = 1
  xk = X[:, k - 1]
- gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, glm.eta)
+ w = np.ones_like(y)
+ gk, hk = _gradhess_logloss_1d(glm.distr, xk, y, z, w, glm.eta)
 
  # test grad and hess
  if distr != 'multinomial':
@@ -359,7 +360,7 @@ def test_cdfast():
 
  # test cdfast
  ActiveSet = np.ones(n_features + 1)
- beta_ret, z_ret = glm._cdfast(X, y, z,
+ beta_ret, z_ret = glm._cdfast(X, y, w, z,
  ActiveSet, beta_, glm.reg_lambda)
  assert(beta_ret.shape == beta_.shape)
  assert(z_ret.shape == z.shape)