average eta

fgregg · Oct 11, 2016 · 351b1ea · 351b1ea
1 parent d808ce9
commit 351b1ea
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 43 deletions.
diff --git a/pseudolikelihood/centered_potts.py b/pseudolikelihood/centered_potts.py
@@ -83,7 +83,7 @@ def predict_proba(self, X, y):
 
  spatial = safe_sparse_dot(A, (Y_multi - p_nonspatial))[:, :-1]
 
- p += (eta * spatial)
+ p += eta.T * np.array(spatial/A.sum(axis=1))
 
  p = np.hstack((p, np.zeros((features.shape[0], 1))))
 
@@ -135,6 +135,8 @@ def _multinomial_loss(w, features, A, Y, alpha, sample_weight):
  """
  n_classes = Y.shape[1]
  n_features = features.shape[1]
+ n_neighbors = A.sum(axis=1).reshape(-1, 1)
+
  w = w.reshape(n_classes - 1, -1)
  sample_weight = sample_weight[:, np.newaxis]
  intercept = w[:, 0]
@@ -148,9 +150,10 @@ def _multinomial_loss(w, features, A, Y, alpha, sample_weight):
  p_nonspatial -= logsumexp(p_nonspatial, axis=1)[:, np.newaxis]
  p_nonspatial = np.exp(p_nonspatial, p_nonspatial)
 
- spatial = safe_sparse_dot(A, (Y - p_nonspatial))[:, :-1]
+ spatial = safe_sparse_dot(A, (Y - p_nonspatial))[:, :-1]/n_neighbors
+ spatial[np.isnan(spatial)] = 0
 
- p += (eta.T * spatial)
+ p += eta.T * np.array(spatial)
 
  p = np.hstack((p, np.zeros((features.shape[0], 1))))
 
@@ -193,6 +196,8 @@ def _multinomial_loss_grad(w, features, A, Y, alpha, sample_weight):
  """
  n_classes = Y.shape[1]
  n_features = features.shape[1]
+ n_neighbors = A.sum(axis=1).reshape(-1, 1)
+
  grad = np.zeros((n_classes - 1, n_features + 2))
 
  loss, p_nonspatial, p, w = _multinomial_loss(w, features, A, Y,
@@ -205,12 +210,16 @@ def _multinomial_loss_grad(w, features, A, Y, alpha, sample_weight):
  for c in range(n_classes - 1):
  mu = p_nonspatial[:, c].copy().reshape(-1, 1)
  mu *= (1 - mu)
- centered_features = features - w[c, -1] * safe_sparse_dot(A, features * mu)
+ spatial = safe_sparse_dot(A, features * mu)/n_neighbors
+ spatial[np.isnan(spatial)] = 0
+ centered_features = features - w[c, -1] * spatial
+
  grad[c, :(n_features + 1)] = safe_sparse_dot(diff[:, c].T, centered_features)
 
- spatial = safe_sparse_dot(A, (Y - p_nonspatial))[:, :-1]
+ spatial = safe_sparse_dot(A, (Y - p_nonspatial))[:, :-1]/n_neighbors
+ spatial[np.isnan(spatial)] = 0
 
- grad[:, -1] = (spatial * diff).sum(axis=0)
+ grad[:, -1] = (diff * np.array(spatial)).sum(axis=0)
 
  grad[:, 1:n_features + 2] += alpha * w[:, 1:]
 
@@ -223,14 +232,15 @@ def rpotts(X, model):
 
  n_classes = len(model.classes_)
  n_sites = features.shape[0]
+ n_neighbors = A.sum(axis=1).reshape(-1, 1)
 
  R = np.random.uniform(size=(n_sites, 1))
 
  lower = np.empty((n_sites, 1))
  upper = np.empty((n_sites, 1))
 
  betas = model.coef_[:, :-1]
- eta = model.coef_[:, -1]
+ eta = model.coef_[:, -1:]
 
  p = safe_sparse_dot(features, betas.T, dense_output=True)
  p += model.intercept_
@@ -253,16 +263,18 @@ def rpotts(X, model):
  r = r.reshape(-1, 1)
 
  upper_multi = _target(upper)
- upper_spatial = safe_sparse_dot(A, (upper_multi - p_nonspatial))[:, :-1]
+ upper_spatial = safe_sparse_dot(A, (upper_multi - p_nonspatial))[:, :-1]/n_neighbors
+ upper_spatial[np.isnan(upper_spatial)] = 0
 
- upper_p = p + eta * upper_spatial
+ upper_p = p + (eta.T * np.array(upper_spatial))
  upper_p = softmax(np.hstack((upper_p, np.zeros((features.shape[0], 1)))))
  upper_p = upper_p.cumsum(axis=1)
 
  lower_multi = _target(lower)
- lower_spatial = safe_sparse_dot(A, (lower_multi - p_nonspatial))[:, :-1]
+ lower_spatial = safe_sparse_dot(A, (lower_multi - p_nonspatial))[:, :-1]/n_neighbors
+ lower_spatial[np.isnan(lower_spatial)] = 0
 
- lower_p = p + eta * lower_spatial
+ lower_p = p + (eta.T * np.array(lower_spatial))
  lower_p = softmax(np.hstack((lower_p, np.zeros((features.shape[0], 1)))))
  lower_p = lower_p.cumsum(axis=1)
 

diff --git a/tests/test_rpotts.py b/tests/test_rpotts.py
@@ -52,22 +52,24 @@ def setUp(self):
 
  self.Y_multi = potts.lbin.transform(Z)
  self.Z = Z
-
+
+ self.avg_neighbors = A.sum(axis=1).mean()
+
  def test_fit(self):
- potts = CenteredPotts(C=100000000000000000000)
+ potts = CenteredPotts(C=float('inf'))
  potts.fit((self.X, self.A), self.Z)
  assert_array_almost_equal(potts.coef_,
- np.array([[ 2.602, 1.298, 0.801]]),
+ np.array([[ 2.289, 1.796, 2.857]]),
  3)
  assert_array_almost_equal(potts.intercept_,
- np.array([-0.155]),
+ np.array([-0.092]),
  3)
 
 
  def test_gradient(self):
- w = np.array([[0, 1, 1, 0.5]])
- features = self.X
+ w = np.array([[0, 1, 1, 0.5 * self.avg_neighbors]])
  A = self.A
+ features = self.X
  Y = self.Y_multi
  alpha = 0
  sample_weight = np.ones(features.shape[0])
@@ -76,21 +78,22 @@ def test_gradient(self):
  loss, grad, p = out
 
  assert_array_almost_equal(grad,
- np.array([  1.955, -3.102, -1.857, -24.036,]),
+ np.array([ 1.495, -2.708, -2.25 , -5.798]),
  3)
 
  def test_loss(self):
- w = np.array([[0, 1, 1, 0.5]])
- features = self.X
+
+ w = np.array([[0, 1, 1, 0.5 * self.avg_neighbors]])
  A = self.A
+ features = self.X
  Y = self.Y_multi
  alpha = 0
  sample_weight = np.ones(features.shape[0])
 
  out = cp._multinomial_loss(w, features, A, Y, alpha, sample_weight)
  loss, p_nonspatial, p, out_w = out
 
- assert_approx_equal(loss, 231.7263)
+ assert_approx_equal(loss, 232.612, 3)
  assert_array_almost_equal(p_nonspatial.sum(axis=1), np.ones(features.shape[0]))
  assert_array_almost_equal(p.sum(axis=1), np.ones(features.shape[0]))
  assert_array_almost_equal(w, out_w)
@@ -103,29 +106,29 @@ def test_sampler(self):
  potts = CenteredPotts(C=float('inf'))
  potts.fit((features, A), Z)
 
- target = [[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
+ target = [[1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
  1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,
- 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
- 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
- 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
+ 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
+ 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1,
  1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1,
  0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
  0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0,
- 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
- 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
+ 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1,
+ 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
  1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
- 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
- 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
- 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
- 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,
- 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
- 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1,
- 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1,
- 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
- 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
+ 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0,
+ 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0,
+ 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
+ 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
+ 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,
+ 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
+ 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
  0, 1, 0, 1, 0, 0, 0, 0, 1]]
 
  import random
@@ -152,6 +155,7 @@ def setUp(self):
  potts.fit((X, A), self.Z)
 
  self.Y_multi = potts.lbin.transform(self.Z)
+ self.avg_neighbors = A.sum(axis=1).mean()
 
 
  def test_loss(self):
@@ -167,15 +171,15 @@ def test_loss(self):
  out = cp._multinomial_loss(w, features, A, Y, alpha, sample_weight)
  loss, p_nonspatial, p, out_w = out
 
- assert_approx_equal(loss, 475.72979)
+ assert_approx_equal(loss, 450.849, 3)
  assert_array_almost_equal(p_nonspatial.sum(axis=1), np.ones(features.shape[0]))
  assert_array_almost_equal(p.sum(axis=1), np.ones(features.shape[0]))
  assert_array_almost_equal(w, out_w)
 
 
  def test_gradient(self):
- w = np.array([[0, 1, 1, 0.5],
- [0, 1, 1, 0.5]])
+ w = np.array([[0, 1, 1, 0.5 * self.avg_neighbors],
+ [0, 1, 1, 0.5 * self.avg_neighbors]])
  features = self.X
  A = self.A
  Y = self.Y_multi
@@ -186,6 +190,6 @@ def test_gradient(self):
  loss, grad, p = out
 
  assert_array_almost_equal(grad,
- np.array([ 5.281, 1.712, 2.13 , 47.858,
-  0.3, 1.696, 1.798, 52.718]),
+ np.array([5.488, 1.661, 2.319, 12.916,
+ 0.063, 1.632, 1.543, 14.744]),
  3)