Skip to content

Commit

Permalink
Kendall tau distance between parameter vectors.
Browse files Browse the repository at this point in the history
Added a new utility function to compute the number of pairwise disagreements
between the rankings induced by two parameter vectors. This complements
Spearman's footrule distance.
  • Loading branch information
lucasmaystre committed Jul 5, 2017
1 parent 839e86e commit 9683242
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 6 deletions.
1 change: 1 addition & 0 deletions choix/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from .utils import (
footrule_dist,
kendalltau_dist,
log_likelihood_pairwise,
log_likelihood_rankings,
log_likelihood_top1,
Expand Down
55 changes: 51 additions & 4 deletions choix/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import warnings

from scipy.linalg import solve_triangular
from scipy.stats import rankdata
from scipy.stats import rankdata, kendalltau


SQRT2 = math.sqrt(2.0)
Expand Down Expand Up @@ -38,9 +38,10 @@ def footrule_dist(params1, params2=None):
r"""Compute Spearman's footrule distance between two models.
This function computes Spearman's footrule distance between the rankings
induced by the two parameter vectors. Let :math:`\sigma_i` be the rank of
item ``i`` in the first model, and :math:`\tau_i` be its rank in the second
model. Spearman's footrule distance is defined by
induced by two parameter vectors. Let :math:`\sigma_i` be the rank of item
``i`` in the model described by ``params1``, and :math:`\tau_i` be its rank
in the model described by ``params2``. Spearman's footrule distance is
defined by
.. math::
Expand Down Expand Up @@ -72,6 +73,52 @@ def footrule_dist(params1, params2=None):
return np.sum(np.abs(ranks1 - ranks2))


def kendalltau_dist(params1, params2=None):
r"""Compute the Kendall tau distance between two models.
This function computes the Kendall tau distance between the rankings
induced by two parameter vectors. Let :math:`\sigma_i` be the rank of item
``i`` in the model described by ``params1``, and :math:`\tau_i` be its rank
in the model described by ``params2``. The Kendall tau distance is defined
as the number of pairwise disagreements between the two rankings, i.e.,
.. math::
\sum_{i=1}^N \sum_{j=1}^N
\mathbf{1} \{ \sigma_i > \sigma_j \wedge \tau_i < \tau_j \}
If the argument ``params2`` is ``None``, the second model is assumed to
rank the items by their index: item ``0`` has rank 1, item ``1`` has rank
2, etc.
If some values are equal within a parameter vector, all items are given a
distinct rank, corresponding to the order in which the values occur.
Parameters
----------
params1 : array_like
Parameters of the first model.
params2 : array_like, optional
Parameters of the second model.
Returns
-------
dist : float
Kendall tau distance.
"""
assert params2 is None or len(params1) == len(params2)
# We use `-params` because the highest values should be ranked first.
ranks1 = rankdata(-params1, method="ordinal")
if params2 is None:
ranks2 = np.arange(1, len(params1) + 1, dtype=float)
else:
ranks2 = rankdata(-params2, method="ordinal")
tau, _ = kendalltau(ranks1, ranks2)
n_items = len(params1)
n_pairs = n_items * (n_items - 1) / 2
return round((n_pairs - n_pairs * tau) / 2)


def log_likelihood_pairwise(data, params):
"""Compute the log-likelihood of model parameters."""
loglik = 0
Expand Down
19 changes: 17 additions & 2 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@ Functions that :ref:`generate parameters and data <generators>`.
choix.generate_params
choix.generate_pairwise
choix.generate_rankings
choix.footrule_dist
choix.compare

Functions that :ref:`compute distances between models <distances>`.

.. autosummary::
:nosignatures:

choix.footrule_dist
choix.kendalltau_dist

Functions that :ref:`process pairwise comparisons <process-pairwise>`.

.. autosummary::
Expand Down Expand Up @@ -64,10 +71,18 @@ Generators
.. autofunction:: choix.generate_params
.. autofunction:: choix.generate_pairwise
.. autofunction:: choix.generate_rankings
.. autofunction:: choix.footrule_dist
.. autofunction:: choix.compare


.. _distances:

Distances
---------

.. autofunction:: choix.footrule_dist
.. autofunction:: choix.kendalltau_dist


.. _process-pairwise:

Processing pairwise comparisons
Expand Down
28 changes: 28 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,34 @@ def test_footrule_dist_default():
assert footrule_dist(params3) == 25.0


def test_kendalltau_dist_error():
params1 = np.arange(3, dtype=float)
params2 = np.arange(4, dtype=float)
with pytest.raises(AssertionError):
kendalltau_dist(params1, params2)


def test_kendalltau_dist_simple_cases():
params1 = np.array([+1.0, -1.2, +0.0])
params2 = np.array([+1.5, -0.2, -0.2])
params3 = np.array([-1.0, +1.2, +0.0])
for params in (params1, params2, params3):
assert kendalltau_dist(params, params) == 0.0
assert kendalltau_dist(params1, params2) == 1.0
assert kendalltau_dist(params1, params3) == 3.0
assert kendalltau_dist(params2, params3) == 2.0


def test_kendalltau_dist_default():
params1 = np.arange(0, 10)
assert kendalltau_dist(params1) == (10 * 9) / 2
params2 = np.arange(0, -10, -1)
assert kendalltau_dist(params2) == 0
# This is a deceptive case, the ties just happen to be resolved correctly.
params3 = np.ones(10)
assert kendalltau_dist(params3) == 0


def test_log_likelihood_pairwise():
data1 = ((0,1),)
data2 = ((0,1), (1,0))
Expand Down

0 comments on commit 9683242

Please sign in to comment.