Breaking change: parameters on a logarithmic scale.

After much thought, I see that it makes more sense to keep parameters consistently on a logarithmic scale. It is better for numerical stability, and is more "natural" for some of the algorithms (Scipy-based optimization, approximate Bayesian inference).
lucasmaystre · Jul 6, 2017 · 965e274 · 965e274
1 parent 9683242
commit 965e274
Show file tree

Hide file tree

Showing 16 changed files with 167 additions and 156 deletions.
diff --git a/choix/convergence.py b/choix/convergence.py
@@ -25,26 +25,25 @@ class NormOfDifferenceTest(ConvergenceTest):
  """Convergence test based on the norm of the difference vector.
 
  This convergence test computes the difference between two successive
- parameter vectors in log-space, and declares convergence when the norm of
- this difference vector (normalized by the number of items) is below `tol`.
+ parameter vectors, and declares convergence when the norm of this
+ difference vector (normalized by the number of items) is below `tol`.
  """
 
  def __init__(self, tol=1e-8, order=1):
  self._tol = tol
  self._ord = order
- self._prev_thetas = None
+ self._prev_params = None
 
  def __call__(self, params, update=True):
- thetas = np.log(params)
- thetas -= np.mean(thetas)
- if self._prev_thetas is None:
+ params = np.asarray(params) - np.mean(params)
+ if self._prev_params is None:
  if update:
- self._prev_thetas = thetas
+ self._prev_params = params
  return False
- dist = np.linalg.norm(self._prev_thetas - thetas, ord=self._ord)
+ dist = np.linalg.norm(self._prev_params - params, ord=self._ord)
  if update:
- self._prev_thetas = thetas
- return dist <= self._tol * len(thetas)
+ self._prev_params = params
+ return dist <= self._tol * len(params)
 
 
 class ScalarFunctionTest(ConvergenceTest):

diff --git a/choix/lsr.py b/choix/lsr.py
@@ -3,17 +3,17 @@
 import numpy as np
 
 from .convergence import NormOfDifferenceTest
-from .utils import statdist
+from .utils import exp_transform, log_transform, statdist
 
 
 def _init_lsr(n_items, alpha, initial_params):
  """Initialize the LSR Markov chain and the weights."""
  if initial_params is None:
- ws = np.ones(n_items)
+ weights = np.ones(n_items)
  else:
- ws = np.asarray(initial_params)
+ weights = exp_transform(initial_params)
  chain = alpha * np.ones((n_items, n_items), dtype=float)
- return ws, chain
+ return weights, chain
 
 
 def _ilsr(n_items, data, alpha, params, max_iter, tol, lsr_fun):
@@ -63,15 +63,15 @@ def lsr_pairwise(n_items, data, alpha=0.0, initial_params=None):
  params : np.array
  An estimate of model parameters.
  """
- ws, chain = _init_lsr(n_items, alpha, initial_params)
+ weights, chain = _init_lsr(n_items, alpha, initial_params)
  for winner, loser in data:
- chain[loser, winner] += 1 / (ws[winner] + ws[loser])
+ chain[loser, winner] += 1 / (weights[winner] + weights[loser])
  chain -= np.diag(chain.sum(axis=1))
- return statdist(chain)
+ return log_transform(statdist(chain))
 
 
-def ilsr_pairwise(n_items, data, alpha=0.0, initial_params=None,
- max_iter=100, tol=1e-8):
+def ilsr_pairwise(
+ n_items, data, alpha=0.0, initial_params=None, max_iter=100, tol=1e-8):
  """Compute the ML estimate of model parameters using I-LSR.
 
  This function computes the maximum-likelihood (ML) estimate of model
@@ -132,15 +132,15 @@ def rank_centrality(n_items, data, alpha=0.0):
  params : np.array
  An estimate of model parameters.
  """
- ws, chain = _init_lsr(n_items, alpha, None)
+ _, chain = _init_lsr(n_items, alpha, None)
  for winner, loser in data:
  chain[loser, winner] += 1.0
  # Transform the counts into ratios.
  idx = chain > 0 # Indices (i,j) of non-zero entries.
  chain[idx] = chain[idx] / (chain + chain.T)[idx]
  # Finalize the Markov chain by adding the self-transition rate.
  chain -= np.diag(chain.sum(axis=1))
- return statdist(chain)
+ return log_transform(statdist(chain))
 
 
 def lsr_rankings(n_items, data, alpha=0.0, initial_params=None):
@@ -174,20 +174,20 @@ def lsr_rankings(n_items, data, alpha=0.0, initial_params=None):
  params : np.array
  An estimate of model parameters.
  """
- ws, chain = _init_lsr(n_items, alpha, initial_params)
+ weights, chain = _init_lsr(n_items, alpha, initial_params)
  for ranking in data:
- sum_ = ws.take(ranking).sum()
+ sum_ = weights.take(ranking).sum()
  for i, winner in enumerate(ranking[:-1]):
  val = 1.0 / sum_
  for loser in ranking[i+1:]:
  chain[loser, winner] += val
- sum_ -= ws[winner]
+ sum_ -= weights[winner]
  chain -= np.diag(chain.sum(axis=1))
- return statdist(chain)
+ return log_transform(statdist(chain))
 
 
-def ilsr_rankings(n_items, data, alpha=0.0, initial_params=None,
- max_iter=100, tol=1e-8):
+def ilsr_rankings(
+ n_items, data, alpha=0.0, initial_params=None, max_iter=100, tol=1e-8):
  """Compute the ML estimate of model parameters using I-LSR.
 
  This function computes the maximum-likelihood (ML) estimate of model
@@ -254,17 +254,17 @@ def lsr_top1(n_items, data, alpha=0.0, initial_params=None):
  params : np.array
  An estimate of model parameters.
  """
- ws, chain = _init_lsr(n_items, alpha, initial_params)
+ weights, chain = _init_lsr(n_items, alpha, initial_params)
  for winner, losers in data:
- val = 1 / (ws.take(losers).sum() + ws[winner])
+ val = 1 / (weights.take(losers).sum() + weights[winner])
  for loser in losers:
  chain[loser, winner] += val
  chain -= np.diag(chain.sum(axis=1))
- return statdist(chain)
+ return log_transform(statdist(chain))
 
 
-def ilsr_top1(n_items, data, alpha=0.0, initial_params=None,
- max_iter=100, tol=1e-8):
+def ilsr_top1(
+ n_items, data, alpha=0.0, initial_params=None, max_iter=100, tol=1e-8):
  """Compute the ML estimate of model parameters using I-LSR.
 
  This function computes the maximum-likelihood (ML) estimate of model

diff --git a/choix/mm.py b/choix/mm.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from .convergence import NormOfDifferenceTest
+from .utils import log_transform, exp_transform
 
 
 def _mm(n_items, data, initial_params, alpha, max_iter, tol, mm_fun):
@@ -16,32 +17,33 @@ def _mm(n_items, data, initial_params, alpha, max_iter, tol, mm_fun):
  If the algorithm does not converge after `max_iter` iterations.
  """
  if initial_params is None:
- params = np.ones(n_items)
+ params = np.zeros(n_items)
  else:
  params = initial_params
  converged = NormOfDifferenceTest(tol=tol, order=1)
  for _ in range(max_iter):
  nums, denoms = mm_fun(n_items, data, params)
- params = (nums + alpha) / (denoms + alpha)
- params = (n_items / params.sum()) * params
+ params = log_transform((nums + alpha) / (denoms + alpha))
  if converged(params):
  return params
  raise RuntimeError("Did not converge after {} iterations".format(max_iter))
 
 
 def _mm_pairwise(n_items, data, params):
  """Inner loop of MM algorithm for pairwise data."""
+ weights = exp_transform(params)
  wins = np.zeros(n_items, dtype=float)
  denoms = np.zeros(n_items, dtype=float)
  for winner, loser in data:
  wins[winner] += 1.0
- val = 1.0 / (params[winner] + params[loser])
+ val = 1.0 / (weights[winner] + weights[loser])
  denoms[winner] += val
  denoms[loser] += val
  return wins, denoms
 
 
-def mm_pairwise(n_items, data, initial_params=None, alpha=0.0,
+def mm_pairwise(
+ n_items, data, initial_params=None, alpha=0.0,
  max_iter=10000, tol=1e-8):
  """Compute the ML estimate of model parameters using the MM algorithm.
 
@@ -80,16 +82,17 @@ def mm_pairwise(n_items, data, initial_params=None, alpha=0.0,
 
 def _mm_rankings(n_items, data, params):
  """Inner loop of MM algorithm for ranking data."""
+ weights = exp_transform(params)
  wins = np.zeros(n_items, dtype=float)
  denoms = np.zeros(n_items, dtype=float)
  for ranking in data:
- sum_ = params.take(ranking).sum()
+ sum_ = weights.take(ranking).sum()
  for i, winner in enumerate(ranking[:-1]):
  wins[winner] += 1
  val = 1.0 / sum_
  for item in ranking[i:]:
  denoms[item] += val
- sum_ -= params[winner]
+ sum_ -= weights[winner]
  return wins, denoms
 
 
@@ -132,17 +135,19 @@ def mm_rankings(n_items, data, initial_params=None, alpha=0.0,
 
 def _mm_top1(n_items, data, params):
  """Inner loop of MM algorithm for top1 data."""
+ weights = exp_transform(params)
  wins = np.zeros(n_items, dtype=float)
  denoms = np.zeros(n_items, dtype=float)
  for winner, losers in data:
  wins[winner] += 1
- val = 1 / (params.take(losers).sum() + params[winner])
+ val = 1 / (weights.take(losers).sum() + weights[winner])
  for item in itertools.chain([winner], losers):
  denoms[item] += val
  return wins, denoms
 
 
-def mm_top1(n_items, data, initial_params=None, alpha=0.0,
+def mm_top1(
+ n_items, data, initial_params=None, alpha=0.0,
  max_iter=10000, tol=1e-8):
  """Compute the ML estimate of model parameters using the MM algorithm.
 
@@ -180,9 +185,10 @@ def mm_top1(n_items, data, initial_params=None, alpha=0.0,
 
 def _choicerank(n_items, data, params):
  """Inner loop of ChoiceRank algorithm."""
+ weights = exp_transform(params)
  adj, adj_t, traffic_in, traffic_out = data
  # First phase of message passing.
- zs = adj.dot(params)
+ zs = adj.dot(weights)
  # Second phase of message passing.
  denoms = adj_t.dot(traffic_out / zs)
  return traffic_in, denoms

diff --git a/choix/opt.py b/choix/opt.py
@@ -4,27 +4,21 @@
 from scipy.optimize import minimize
 from scipy.misc import logsumexp
 
+from .utils import softmax
+
 
 def _safe_exp(x):
- x = min(max(x, -500), 500)
+ x = min(x, 500)
  return math.exp(x)
 
 
-def _safe_softmax(xs):
- xs = np.clip(xs - np.max(xs), -500, 0)
- exps = np.exp(xs)
- return exps / exps.sum(axis=0)
-
-
 class PairwiseFcts:
 
  """Optimization-related methods for pairwise comparison data.
  
  This class provides methods to compute the negative log-likelihood (the
  "objective"), its gradient and its Hessian, given model parameters and
  pairwise comparison data.
-
- The parameters are assumed to be in the log domain.
  """
 
  def __init__(self, data, penalty):
@@ -65,8 +59,6 @@ class Top1Fcts:
  top-1 data.
 
  The class also provides an alternative constructor for ranking data.
-
- The parameters are assumed to be in the log domain.
  """
 
  def __init__(self, data, penalty):
@@ -94,7 +86,7 @@ def gradient(self, params):
  grad = 2 * self._penalty * params
  for winner, losers in self._data:
  idx = np.append(winner, losers)
- zs = _safe_softmax(params.take(idx))
+ zs = softmax(params.take(idx))
  grad[idx] += zs
  grad[winner] += -1
  return grad
@@ -103,16 +95,15 @@ def hessian(self, params):
  hess = 2 * self._penalty * np.identity(len(params))
  for winner, losers in self._data:
  idx = np.append(winner, losers)
- zs = _safe_softmax(params.take(idx))
+ zs = softmax(params.take(idx))
  hess[np.ix_(idx, idx)] += -np.outer(zs, zs)
  hess[idx,idx] += zs
  return hess
 
 
 def _opt(n_items, fcts, method, initial_params, max_iter, tol):
  if initial_params is not None:
- x0 = np.log(initial_params)
- x0 = x0 - np.mean(x0)
+ x0 = initial_params
  else:
  x0 = np.zeros(n_items)
  if method == "Newton-CG":
@@ -129,9 +120,7 @@ def _opt(n_items, fcts, method, initial_params, max_iter, tol):
  options={"gtol": tol, "maxiter": max_iter})
  else:
  raise ValueError("method not known")
- # Parameters are in the log domain - reparametrize the model.
- params = np.exp(res.x)
- return params / (params.sum() / n_items)
+ return res.x
 
 
 def opt_pairwise(n_items, data, penalty=1e-6, method="Newton-CG",