Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FCIT #315

Merged
merged 59 commits into from
May 13, 2022
Merged

FCIT #315

Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
992a2b4
Create smoothCF.py
MatthewZhao26 Oct 21, 2021
c4b8b6c
Added mean_embedding
MatthewZhao26 Oct 21, 2021
54bed36
updated inheritance from IndependenceTest
MatthewZhao26 Oct 28, 2021
6dd8f4d
Updated SmoothCF and ME tests to match author code output
MatthewZhao26 Nov 3, 2021
4b25431
Updated SmoothCF and ME inheritance, started editing tutorial
MatthewZhao26 Nov 11, 2021
beb4ecb
Updated documentation
MatthewZhao26 Nov 18, 2021
1f2ad30
Enabled numba, added docstrings
MatthewZhao26 Nov 30, 2021
ca8bdd1
Minor change
MatthewZhao26 Nov 30, 2021
1927935
Merge branch 'main' into fast_tstest
PSSF23 Dec 2, 2021
a789fa8
Response to initial PR comments
MatthewZhao26 Dec 10, 2021
0d57d2a
Merge branch 'fast_tstest' of https://github.com/MatthewZhao26/hyppo …
MatthewZhao26 Dec 10, 2021
37b859e
Added random state to SmoothCF
MatthewZhao26 Dec 12, 2021
5e076d6
Merge branch 'dev' into fast_tstest
sampan501 Dec 12, 2021
e5d9cbf
ran black
MatthewZhao26 Dec 13, 2021
f35f7b3
ran black again
MatthewZhao26 Dec 13, 2021
84f75c6
remove blank
MatthewZhao26 Dec 13, 2021
1594f6d
quotes
MatthewZhao26 Dec 13, 2021
bbfa96a
Changed example
MatthewZhao26 Dec 13, 2021
398a408
black
MatthewZhao26 Dec 13, 2021
e0659a4
deleted
MatthewZhao26 Dec 13, 2021
002deef
black + more changes
MatthewZhao26 Dec 13, 2021
fa4ad44
random_state added to ME
MatthewZhao26 Dec 13, 2021
58352a3
coverage
MatthewZhao26 Dec 13, 2021
2f021c7
tests
MatthewZhao26 Dec 13, 2021
daf006a
black
MatthewZhao26 Dec 13, 2021
27638af
more tests
MatthewZhao26 Dec 13, 2021
e7df8ae
tests
MatthewZhao26 Dec 13, 2021
70e1b24
reformatting
MatthewZhao26 Dec 13, 2021
c685597
reformat
MatthewZhao26 Dec 13, 2021
c2348c0
reformat
MatthewZhao26 Dec 13, 2021
63a67a1
Merge branch 'dev' into fast_tstest
sampan501 Dec 13, 2021
797a62f
added journal to citation (Fast 2-sample)
MatthewZhao26 Dec 13, 2021
1127f71
Merge branch 'fast_tstest' of https://github.com/MatthewZhao26/hyppo …
MatthewZhao26 Dec 13, 2021
bf0d7d3
corrected docstring math format
MatthewZhao26 Dec 13, 2021
1ec4696
formatting update, helper docstring update
MatthewZhao26 Dec 15, 2021
e7c56d0
Merge branch 'dev' into fast_tstest
sampan501 Dec 17, 2021
8c0c98d
Formatting changes, renaming of helpers, moved random_state
MatthewZhao26 Dec 17, 2021
5d52c84
Merge branch 'fast_tstest' of https://github.com/MatthewZhao26/hyppo …
MatthewZhao26 Dec 17, 2021
3adbf35
fix tabbing issue in ksample
MatthewZhao26 Dec 17, 2021
ea2a1f6
added __init__ variable explanantions to tutorial
MatthewZhao26 Dec 18, 2021
db653fe
minor formatting change to tutorial
MatthewZhao26 Dec 18, 2021
cfb6dcb
fix tutorial rendering issue
sampan501 Dec 20, 2021
dd9587c
Create FCIT.py
MatthewZhao26 Apr 7, 2022
b850800
Creating new conditional independence module
MatthewZhao26 Apr 11, 2022
7b8ceb7
Update cond'l ind module + bib
MatthewZhao26 Apr 12, 2022
71001f6
updated index.rst
MatthewZhao26 Apr 13, 2022
358a459
updated config.yml
MatthewZhao26 Apr 13, 2022
6acfd56
added tests, updated hyppo init
MatthewZhao26 Apr 18, 2022
19174a0
updated tests FCIT
MatthewZhao26 Apr 18, 2022
6c57668
Merge branch 'dev' into fast_tstest
MatthewZhao26 Apr 21, 2022
6b5e095
Update FCIT.py
MatthewZhao26 Apr 21, 2022
2ef7143
added FCIT documentation
MatthewZhao26 May 5, 2022
e8b719f
Merge branch 'dev' into pr/315
sampan501 May 5, 2022
a6f4d6b
black update, documentation update FCIT
MatthewZhao26 May 11, 2022
bff8d53
Merge branch 'fast_tstest' of https://github.com/MatthewZhao26/hyppo …
MatthewZhao26 May 11, 2022
f92697d
added tutorial
MatthewZhao26 May 13, 2022
d04742c
Update conditional.py
MatthewZhao26 May 13, 2022
dff6ceb
rst edits
MatthewZhao26 May 13, 2022
0dd9df6
minor doc changes
MatthewZhao26 May 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added mean_embedding
  • Loading branch information
MatthewZhao26 committed Oct 21, 2021
commit c4b8b6c6864c3171dd11a4903dfb7f8bef90dd2d
2 changes: 2 additions & 0 deletions 1.4.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Collecting scipy
Using cached https://files.pythonhosted.org/packages/47/33/a24aec22b7be7fdb10ec117a95e1e4099890d8bbc6646902f443fc7719d1/scipy-1.7.1.tar.gz
76 changes: 76 additions & 0 deletions hyppo/independence/mean_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import numpy
from warnings import warn
from numpy import mean, transpose, cov, shape
from numpy.linalg import linalg, LinAlgError, solve
from scipy.stats import chi2


class MeanEmbeddingTest:
def mahalanobis_distance(self, difference, num_random_features):
"""

:param difference: distance between two smooth characteristic functions
:param num_random_features: random frequencies to be used
:return: the test statistic, W * Sigma * W
"""
num_samples, _ = shape(difference)
sigma = cov(transpose(difference))

try:
linalg.inv(sigma)
except LinAlgError:
warn('covariance matrix is singular. Pvalue returned is 1.1')
raise

mu = mean(difference, 0)

if num_random_features == 1:
stat = float(num_samples * mu ** 2) / float(sigma)
else:
stat = num_samples * mu.dot(solve(sigma, transpose(mu)))

return chi2.sf(stat, num_random_features)

def __init__(self, data_x, data_y, scale=1, number_of_random_frequencies=5):
self.data_x = scale*data_x
self.data_y = scale*data_y
self.number_of_frequencies = number_of_random_frequencies
self.scale = scale

def get_estimate(self, data, point):
'''

:param data:
:param point:
:return: mean embeddings of data
'''
z = data - self.scale * point
z2 = numpy.linalg.norm(z, axis=1)**2
return numpy.exp(-z2/2.0)

def get_difference(self, point):
'''

:param point:
:return: differences in ME
'''
return self.get_estimate(self.data_x, point) - self.get_estimate(self.data_y, point)

def vector_of_differences(self, dim):
'''

:param dim:
:return: vector of difference b/w mean embeddings
'''
points = numpy.random.randn(self.number_of_frequencies, dim)
a = [self.get_difference(point) for point in points]
return numpy.array(a).T

def compute_pvalue(self):
'''
:return: W * Sigma * W statistic and p value
'''
_, dimension = numpy.shape(self.data_x)
obs = self.vector_of_differences(dimension)

return self.mahalanobis_distance(obs, self.number_of_frequencies)
90 changes: 90 additions & 0 deletions hyppo/independence/smoothCF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import numpy
from warnings import warn
from numpy import mean, transpose, cov, shape, concatenate, newaxis, exp, sin, cos
from numpy.linalg import linalg, LinAlgError, solve
from scipy.stats import chi2


class SmoothCFTest:
def _gen_random(self, dimension):
'''
:param dimension: number of
:return: normally distributed array
'''
return numpy.random.randn(dimension, self.num_random_features)

def smooth(self, data):
'''
:param data: X or Y
:return: normalized
'''
w = linalg.norm(data, axis=1)
w = exp(-w ** 2 / 2)
return w[:, newaxis]

def smooth_cf(self, data, w, random_frequencies):
"""
:param data: X or Y
:param w:
:param random_frequencies:
:return: The smoothed CF
"""
n, _ = data.shape
_, d = random_frequencies.shape
mat = data.dot(random_frequencies)
arr = concatenate((sin(mat) * w, cos(mat) * w), 1)
n1, d1 = arr.shape
assert n1 == n and d1 == 2 * d and w.shape == (n, 1)
return arr

def smooth_difference(self, random_frequencies, X, Y):
"""
:param random_frequencies: distributed normally
:param X: X data
:param Y: Y data
:return: Distance between smooth characteristic functions
"""
x_smooth = self.smooth(X)
y_smooth = self.smooth(Y)
return self.smooth_cf(X, x_smooth, random_frequencies) - self.smooth_cf(Y, y_smooth, random_frequencies)

def __init__(self, data_x, data_y, scale=2.0, num_random_features=5, frequency_generator=None):
self.data_x = scale*data_x
self.data_y = scale*data_y
self.num_random_features = num_random_features
_, dimension_x = numpy.shape(self.data_x)
_, dimension_y = numpy.shape(self.data_y)
assert dimension_x == dimension_y
self.random_frequencies = self._gen_random(dimension_x)

def mahalanobis_distance(self, difference, num_random_features):
"""

:param difference: distance between two smooth characteristic functions
:param num_random_features: random frequencies to be used
:return: the test statistic, W * Sigma * W
"""
num_samples, _ = shape(difference)
sigma = cov(transpose(difference))

try:
linalg.inv(sigma)
except LinAlgError:
warn('covariance matrix is singular. Pvalue returned is 1.1')
raise

mu = mean(difference, 0)

if num_random_features == 1:
stat = float(num_samples * mu ** 2) / float(sigma)
else:
stat = num_samples * mu.dot(solve(sigma, transpose(mu)))

return chi2.sf(stat, num_random_features)

def compute_pvalue(self):
"""
:return: test statistic for smoothCF
"""
difference = self.smooth_difference(self.random_frequencies, self.data_x, self.data_y)
return self.mahalanobis_distance(difference, 2 * self.num_random_features)
42 changes: 0 additions & 42 deletions hyppo/independence/tests/smoothCF.py

This file was deleted.

26 changes: 26 additions & 0 deletions hyppo/independence/tests/test_smoothCF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
import pytest
from numpy.testing import assert_almost_equal, assert_approx_equal

from ...tools import linear, multimodal_independence, power, spiral
from .. import MGC


class SmoothCF(object):
"""Test validity of SmoothCF test statistic"""



class SmoothCFError:
def test_oned(self):
np.random.seed(123456789)
est_power = power(
"MGC",
sim_type="indep",
sim="multimodal_independence",
n=50,
p=1,
alpha=0.05,
)

assert_almost_equal(est_power, 0.05, decimal=2)