Kendall tau distance between parameter vectors.

Added a new utility function to compute the number of pairwise disagreements between the rankings induced by two parameter vectors. This complements Spearman's footrule distance.
lucasmaystre · Jul 5, 2017 · 9683242 · 9683242
1 parent 839e86e
commit 9683242
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 6 deletions.
diff --git a/choix/__init__.py b/choix/__init__.py
@@ -20,6 +20,7 @@
 
 from .utils import (
  footrule_dist,
+ kendalltau_dist,
  log_likelihood_pairwise,
  log_likelihood_rankings,
  log_likelihood_top1,

diff --git a/choix/utils.py b/choix/utils.py
@@ -5,7 +5,7 @@
 import warnings
 
 from scipy.linalg import solve_triangular
-from scipy.stats import rankdata
+from scipy.stats import rankdata, kendalltau
 
 
 SQRT2 = math.sqrt(2.0)
@@ -38,9 +38,10 @@ def footrule_dist(params1, params2=None):
  r"""Compute Spearman's footrule distance between two models.
 
  This function computes Spearman's footrule distance between the rankings
- induced by the two parameter vectors. Let :math:`\sigma_i` be the rank of
- item ``i`` in the first model, and :math:`\tau_i` be its rank in the second
- model. Spearman's footrule distance is defined by
+ induced by two parameter vectors. Let :math:`\sigma_i` be the rank of item
+ ``i`` in the model described by ``params1``, and :math:`\tau_i` be its rank
+ in the model described by ``params2``. Spearman's footrule distance is
+ defined by
 
  .. math::
 
@@ -72,6 +73,52 @@ def footrule_dist(params1, params2=None):
  return np.sum(np.abs(ranks1 - ranks2))
 
 
+def kendalltau_dist(params1, params2=None):
+ r"""Compute the Kendall tau distance between two models.
+
+ This function computes the Kendall tau distance between the rankings
+ induced by two parameter vectors. Let :math:`\sigma_i` be the rank of item
+ ``i`` in the model described by ``params1``, and :math:`\tau_i` be its rank
+ in the model described by ``params2``. The Kendall tau distance is defined
+ as the number of pairwise disagreements between the two rankings, i.e.,
+
+ .. math::
+
+ \sum_{i=1}^N \sum_{j=1}^N
+ \mathbf{1} \{ \sigma_i > \sigma_j \wedge \tau_i < \tau_j \}
+
+ If the argument ``params2`` is ``None``, the second model is assumed to
+ rank the items by their index: item ``0`` has rank 1, item ``1`` has rank
+ 2, etc.
+
+ If some values are equal within a parameter vector, all items are given a
+ distinct rank, corresponding to the order in which the values occur.
+
+ Parameters
+ ----------
+ params1 : array_like
+ Parameters of the first model.
+ params2 : array_like, optional
+ Parameters of the second model.
+
+ Returns
+ -------
+ dist : float
+ Kendall tau distance.
+ """
+ assert params2 is None or len(params1) == len(params2)
+ # We use `-params` because the highest values should be ranked first.
+ ranks1 = rankdata(-params1, method="ordinal")
+ if params2 is None:
+ ranks2 = np.arange(1, len(params1) + 1, dtype=float)
+ else:
+ ranks2 = rankdata(-params2, method="ordinal")
+ tau, _ = kendalltau(ranks1, ranks2)
+ n_items = len(params1)
+ n_pairs = n_items * (n_items - 1) / 2
+ return round((n_pairs - n_pairs * tau) / 2)
+
+
 def log_likelihood_pairwise(data, params):
  """Compute the log-likelihood of model parameters."""
  loglik = 0

diff --git a/docs/api.rst b/docs/api.rst
@@ -9,9 +9,16 @@ Functions that :ref:`generate parameters and data <generators>`.
  choix.generate_params
  choix.generate_pairwise
  choix.generate_rankings
- choix.footrule_dist
  choix.compare
 
+Functions that :ref:`compute distances between models <distances>`.
+
+.. autosummary::
+ :nosignatures:
+
+ choix.footrule_dist
+ choix.kendalltau_dist
+
 Functions that :ref:`process pairwise comparisons <process-pairwise>`.
 
 .. autosummary::
@@ -64,10 +71,18 @@ Generators
 .. autofunction:: choix.generate_params
 .. autofunction:: choix.generate_pairwise
 .. autofunction:: choix.generate_rankings
-.. autofunction:: choix.footrule_dist
 .. autofunction:: choix.compare
 
 
+.. _distances:
+
+Distances
+---------
+
+.. autofunction:: choix.footrule_dist
+.. autofunction:: choix.kendalltau_dist
+
+
 .. _process-pairwise:
 
 Processing pairwise comparisons

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -36,6 +36,34 @@ def test_footrule_dist_default():
  assert footrule_dist(params3) == 25.0
 
 
+def test_kendalltau_dist_error():
+ params1 = np.arange(3, dtype=float)
+ params2 = np.arange(4, dtype=float)
+ with pytest.raises(AssertionError):
+ kendalltau_dist(params1, params2)
+
+
+def test_kendalltau_dist_simple_cases():
+ params1 = np.array([+1.0, -1.2, +0.0])
+ params2 = np.array([+1.5, -0.2, -0.2])
+ params3 = np.array([-1.0, +1.2, +0.0])
+ for params in (params1, params2, params3):
+ assert kendalltau_dist(params, params) == 0.0
+ assert kendalltau_dist(params1, params2) == 1.0
+ assert kendalltau_dist(params1, params3) == 3.0
+ assert kendalltau_dist(params2, params3) == 2.0
+
+
+def test_kendalltau_dist_default():
+ params1 = np.arange(0, 10)
+ assert kendalltau_dist(params1) == (10 * 9) / 2
+ params2 = np.arange(0, -10, -1)
+ assert kendalltau_dist(params2) == 0
+ # This is a deceptive case, the ties just happen to be resolved correctly.
+ params3 = np.ones(10)
+ assert kendalltau_dist(params3) == 0
+
+
 def test_log_likelihood_pairwise():
  data1 = ((0,1),)
  data2 = ((0,1), (1,0))