-
Notifications
You must be signed in to change notification settings - Fork 10
/
RIT.R
103 lines (76 loc) · 2.59 KB
/
RIT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#' Tests whether x and y are unconditionally independent
#' @param x Random variable x.
#' @param y Random variable y.
#' @param approx Method for approximating the null distribution. Options include:
#' "lpd4," the Lindsay-Pilla-Basak method (default),
#' "gamma" for the Satterthwaite-Welch method,
#' "hbe" for the Hall-Buckley-Eagleson method,
#' "chi2" for a normalized chi-squared statistic,
#' "perm" for permutation testing (warning: this one is slow but recommended for small samples generally <500 )
#' @param seed The seed for controlling random number generation. Use if you want to replicate results exactly. Default is NULL.
#' @return A list containing the p-value \code{p} and statistic \code{Sta}
#' @examples
#' RIT(rnorm(1000),rnorm(1000));
#'
#' x=rnorm(1000);
#' y=(x+rnorm(1000))^2;
#' RIT(x,y);
RIT <- function(x,y,approx="lpd4",seed=NULL){
if (sd(x)==0 | sd(y)==0){
out=list(p=1,Sta=0,w=w,b=b);
return(out$p)
}
x=matrix2(x);
y=matrix2(y);
r=nrow(x);
if (r>500){
r1=500
} else {r1=r;}
x=normalize(x);
y=normalize(y);
four_x = random_fourier_features(x,num_f=5,sigma=median(c(t(dist(x[1:r1,])))), seed = seed );
four_y = random_fourier_features(y,num_f=5,sigma=median(c(t(dist(y[1:r1,])))), seed = seed );
f_x=normalize(four_x$feat); #stabilizes computations
f_y=normalize(four_y$feat); #stabilizes computations
Cxy=cov(f_x,f_y);
Sta = r*sum(Cxy^2);
#approximate null distributions
if (approx == "perm"){
nperm =1000;
Stas = c();
for (ps in 1:nperm){
perm = sample(1:r,r);
Sta_p = Sta_perm(f_x[perm,],f_y,r)
Stas = c(Stas, Sta_p);
}
p = 1-(sum(Sta >= Stas)/length(Stas));
} else{
res_x = f_x-repmat(t(matrix(colMeans(f_x))),r,1);
res_y = f_y-repmat(t(matrix(colMeans(f_y))),r,1);
d =expand.grid(1:ncol(f_x),1:ncol(f_y));
res = res_x[,d[,1]]*res_y[,d[,2]];
Cov = 1/r * (t(res)%*%res);
if (approx == "chi2"){
i_Cov = ginv(Cov)
Sta = r * (c(Cxy)%*% i_Cov %*% c(Cxy) );
p = 1-pchisq(Sta, length(c(Cxy)));
} else{
eig_d = eigen(Cov);
eig_d$values=eig_d$values[eig_d$values>0];
if (approx == "gamma"){
p=1-sw(eig_d$values,Sta);
} else if (approx == "hbe") {
p=1-hbe(eig_d$values,Sta);
} else if (approx == "lpd4"){
eig_d_values=eig_d$values;
p=try(1-lpb4(eig_d_values,Sta), silent=TRUE);
if (!is.numeric(p) | is.nan(p)){
p=1-hbe(eig_d$values,Sta);
}
}
}
}
if (p<0) p=0;
out=list(p=p,Sta=Sta);
return(out)
}