Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make docs clear about FR corrected stat #336

Merged
merged 3 commits into from
Oct 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 52 additions & 36 deletions hyppo/independence/friedman_rafsky.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
import random
from typing import NamedTuple

from numba import jit
import numpy as np

from .base import IndependenceTest, IndependenceTestOutput
from .base import IndependenceTest
from ..tools import perm_test


class FRTestOutput(NamedTuple):
stat: float
pvalue: float
uncor_stat: dict


class FriedmanRafsky(IndependenceTest):
r"""
Friedman-Rafksy (FR) test statistic and p-value.
Expand Down Expand Up @@ -35,41 +43,19 @@ class are removed. The number of independent graphs is then summed to determine

The p-value and null distribution for the corrected statistic are calculated via
a permutation test using :meth:`hyppo.tools.perm_test`.

References
----------
.. footbibliography::
"""

def __init__(self, **kwargs):

IndependenceTest.__init__(self, **kwargs)

def _num_runs(self, labels, MST_connections):
r"""
Helper function to determine number of independent
'runs' from MST connections.

Parameters
----------
labels : ndarry of float
Lables corresponding to respective classes of samples.
MST_connections: list of int
List containing pairs of points connected in final MST.

Returns
-------
run_count : int
Number of runs after severing all such edges with nodes of
differing class labels.
"""
run_count = 1

for x in MST_connections:
if labels[x[0]] != labels[x[1]]:
run_count += 1

return run_count

def statistic(self, x, y):
r"""
Helper function that calculates the Friedman Rafksy test statistic.
Helper function that calculates the Friedman Rafksy test statistic.

Parameters
----------
Expand All @@ -83,13 +69,14 @@ def statistic(self, x, y):
Returns
-------
stat : float
The computed Friedman Rafsky statistic. A value between ``2`` and ``n``.
The computed (uncorrected) Friedman Rafsky statistic. A value between
``2`` and ``n``.
"""
x = np.transpose(x)
labels = np.transpose(y)

MST_connections = MST(x, labels)
stat = self._num_runs(labels, MST_connections)
stat = _num_runs(labels, MST_connections)

return stat

Expand All @@ -102,7 +89,7 @@ def test(
random_state=None,
):
r"""
Calculates the Friedman Rafsky test statistic and p-value.
Calculates the Friedman Rafsky test statistic and p-value.

Parameters
----------
Expand All @@ -125,12 +112,13 @@ def test(
Returns
-------
stat : float
The computed Friedman Rafsky statistic.
The computed (corrected) Friedman Rafsky statistic.
pvalue : float
The computed Friedman Rafsky p-value.
uncor_stat : float
The computed (uncorrected) Friedman Rafsky statistic.
"""

stat, pvalue, null_dist = perm_test(
uncor_stat, pvalue, null_dist = perm_test(
self.statistic,
x,
y,
Expand All @@ -139,10 +127,38 @@ def test(
is_distsim=False,
random_state=random_state,
)
stat = (stat - np.mean(null_dist)) / np.std(null_dist)
self.uncor_stat = uncor_stat
stat = (uncor_stat - np.mean(null_dist)) / np.std(null_dist)
self.stat = stat

return IndependenceTestOutput(stat, pvalue)
return FRTestOutput(stat, pvalue, uncor_stat)


def _num_runs(labels, MST_connections):
r"""
Helper function to determine number of independent
'runs' from MST connections.

Parameters
----------
labels : ndarry of float
Lables corresponding to respective classes of samples.
MST_connections: list of int
List containing pairs of points connected in final MST.

Returns
-------
run_count : int
Number of runs after severing all such edges with nodes of
differing class labels.
"""
run_count = 1

for x in MST_connections:
if labels[x[0]] != labels[x[1]]:
run_count += 1

return run_count


@jit(nopython=True, cache=True) # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions hyppo/independence/mgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .base import IndependenceTest


class MGCestOutput(NamedTuple):
class MGCTestOutput(NamedTuple):
stat: float
pvalue: float
mgc_dict: dict
Expand Down Expand Up @@ -259,4 +259,4 @@ def test(self, x, y, reps=1000, workers=1, random_state=None):
)
self.mgc_dict = mgc_dict

return MGCestOutput(stat, pvalue, mgc_dict)
return MGCTestOutput(stat, pvalue, mgc_dict)
6 changes: 3 additions & 3 deletions hyppo/independence/tests/test_friedman_rafsky.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_linear_oned(self, n, num_runs, obs_stat, obs_pvalue):
num_rows, num_cols = x.shape
y = np.random.choice([0, 1], num_rows, p=[0.5, 0.5])
y = np.transpose(y)
stat1, pvalue1 = FriedmanRafsky().test(x, y)
stat1, pvalue1, _ = FriedmanRafsky().test(x, y)
stat2 = FriedmanRafsky().statistic(x, y)

assert_almost_equal(stat1, obs_stat, decimal=2)
Expand All @@ -30,8 +30,8 @@ def test_rep(self, n):
num_rows, num_cols = x.shape
y = np.random.choice([0, 1], num_rows, p=[0.5, 0.5])
y = np.transpose(y)
stat1, pvalue1 = FriedmanRafsky().test(x, y, random_state=2)
stat2, pvalue2 = FriedmanRafsky().test(x, y, random_state=2)
stat1, pvalue1, _ = FriedmanRafsky().test(x, y, random_state=2)
stat2, pvalue2, _ = FriedmanRafsky().test(x, y, random_state=2)

assert stat1 == stat2
assert pvalue1 == pvalue2