forked from gengxiangc/ResTL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ResTL.py
242 lines (214 loc) · 8.86 KB
/
ResTL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# -*- coding: utf-8 -*-
"""
Created on 2019
@author: Tangmei [email protected]
Hypothesis transfer learning based on fuzzy residual
"""
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.kernel_ridge import KernelRidge
import skfuzzy as fuzz
def scatter_within(data):
# mean_data 8191*8
mean_data = np.tile(data.mean(0)[:, np.newaxis].T, ((data.shape[0], 1)))
data_new = data - mean_data
sum_norm = 0
for i in range(data.shape[0]):
sum_norm += np.linalg.norm(data_new[i,:])
return sum_norm
def adjustScale(kernel_width, min_kernel, max_kernel):
min_num = np.min(kernel_width)
max_num = np.max(kernel_width)
output = (kernel_width -min_num)/(max_num - min_num)
output = min_kernel + output*((max_kernel - min_kernel))
return output
def kernel_width(C, data, min_kernel, max_kernel):
kernel_width = np.zeros((C.shape[0], data.shape[1]))
for j in range(data.shape[1]):
for k in range(C.shape[0]):
kernel_width[k,j] = np.linalg.norm(data[:,j]
- np.ones((data.shape[0]))*C[k,j])
S = np.tile(kernel_width.sum(axis=0)[:, np.newaxis].T, ((kernel_width.shape[0], 1)))
kernel_width = kernel_width/S
kernel_width = adjustScale(kernel_width, min_kernel, max_kernel)
return kernel_width
def gene_ante_deter(data, n_cluster):
K = n_cluster
clusters = np.ones(K).tolist()
C = np.zeros((K, data.shape[-1]))
k = 0
while k < K:
class_ = [0,0]
var_dimen = data.var(0)
maxvar_index = np.argmax(var_dimen)
data_maxvar = data[:, maxvar_index]
mean_maxvar =np.mean(data_maxvar)
class_[0] = data[data_maxvar<=mean_maxvar,:]
class_[1] = data[data_maxvar>mean_maxvar,:]
sum_norm_1 = scatter_within(class_[0])
sum_norm_2 = scatter_within(class_[1])
max_index_class = np.argmax([sum_norm_1,sum_norm_2])
min_index_class = np.argmin([sum_norm_1,sum_norm_2])
data = class_[max_index_class]
clusters[k] = class_[min_index_class]
k +=1
if k==K-1:
clusters[k] = class_[max_index_class]
for i in range(K):
C[i,:] = clusters[i].mean(0)
b = kernel_width(C, data, 1, 10)
return C, b
def fcm(data, n_cluster):
"""
Comute data centers and membership of each point by FCM, and compute the variance of each feature
:param data: n_Samples * n_Features
:param n_cluster: number of center
:return: centers: data center, delta: variance of each feature
"""
n_samples, n_features = data.shape
centers, mem, _, _, _, _, _ = fuzz.cmeans(
data.T, n_cluster, 2.0, error=1e-5, maxiter=200)
# compute delta compute the variance of each feature
delta = np.zeros([n_cluster, n_features])
for i in range(n_cluster):
d = (data - centers[i, :]) ** 2
delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1),
axis=0) / np.sum(mem[i, :])
return centers, delta
def get_x_p(data, centers, delta):
"""
-- as euqation(11) in the paper
Compute firing strength using Gaussian model
:param data: n_Samples * n_Features
:param centers: data center,n_Clusters * n_Features
:param delta: variance of each feature, n_Clusters * n_Features
:return: data_fs data: firing strength,
n_Samples * [n_Clusters * (n_Features+1)]
"""
# delta = delta/3
n_cluster = centers.shape[0]
n_samples = data.shape[0]
# compute firing strength of each data, n_Samples * n_Clusters
mu_a = np.zeros([n_samples, n_cluster])
for i in range(n_cluster):
tmp_k = 0 - np.sum((data - centers[i, :]) ** 2 /
delta[i, :], axis=1)
mu_a[:, i] = np.exp(tmp_k) # exp max 709
# norm
mu_a = mu_a / np.sum(mu_a, axis=1, keepdims=True)
# print(np.count_nonzero(mu_a!=mu_a))
data_1 = np.concatenate((data, np.ones([n_samples, 1])), axis=1)
zt = []
for i in range(n_cluster):
zt.append(data_1 * mu_a[:, i].reshape(-1, 1))
data_fs = np.concatenate(zt, axis=1)
data_fs = np.where(data_fs != data_fs, 1e-5, data_fs)
return data_fs, mu_a
def model(X_source, X_target, Y_source, Y_target, X_test
,n_cluster=10, C=0.1, width = 1, residual = 'RD', basemodel='TSK',
fit_with_target=True):
n, d = X_source.shape
"""
Build the prediction model of target domain with the help of source data.
Parameters
----------
X_source : X of source domain
X_target : X of target domain
y_source : X of source domain
y_target : X of target domain
X_test : test data
n_cluster: int
Number of clustering center
c : float
Regularization coefficient
width : float
The strenghthen of RD rules
residual : string, "RD" or "LS"
The bias can calculated by residual defuzzification "RD" or Least square method "LS".
basemodel : string or callable, string 'TSK' or model from sklearn
basemodel for dataset
fit_with_target: boolean
fit the model with target data after label adaptation
Returns
-------
Y_output : prediction results of test data
x_each, y_each : for visualization of RD rules
"""
# fuzzy partition methodself.label
if d <= 3: # fcm is good for low dimension
centers, delta = fcm(X_source, n_cluster)
else: # varPart is good for high dimension
centers, delta = gene_ante_deter(X_source, n_cluster)
delta = delta*width # change the rules width
# compute x_p: as euqation(3) in the paper
X_p_s , _ = get_x_p(X_source, centers, delta) # X_p_s as euqation(3)
X_p_t , W = get_x_p(X_target, centers, delta) # W as equation(20)
X_p_test, _ = get_x_p(X_test, centers, delta)
# compute error of target data E
if basemodel == 'TSK':
# compute consequent parametsrs by LS as equation(5) in the paper
X_p_s1 = np.dot(X_p_s.T, X_p_s)
Ps = np.linalg.pinv(X_p_s1 + C * np.eye(X_p_s1.shape[0])).dot(X_p_s.T).dot(Y_source)
# compute generalization errors of source model on target data
E = Y_target - X_p_t.dot(Ps)
else: # the basemodel from sklearn
basemodel.fit(X_source, Y_source)
# compute generalization errors of source model on target data
E = Y_target - basemodel.predict(X_target)
# compute the bias Z by least squre (LS)
if residual == 'LS':
# equation(19)
W_1 = np.dot(W.T, W)
C2 = 1e-1
# For those rules with no target data, the corresponding z_k will be controlled close to 0
# with the help of the regularization term C2
Z = np.linalg.pinv(W_1 + C2 * np.eye(W_1.shape[0])).dot(W.T).dot(E)
# compute the bias Z by residual defuzzification (RD)
if residual == 'RD':
# equation(23)
Ws = np.sum(W, axis=0)
W = W/Ws
Z = W*E
Z = np.sum(Z, axis=0)
# the residual on X
Pe = np.zeros((n_cluster*(d+1)))[:, np.newaxis]
for k in range(n_cluster):
Pe[k*(d+1)+d] = Z[k]
residual_test = X_p_test.dot(Pe)
residual_source = X_p_s.dot(Pe)
# cumpute the Y_output
if fit_with_target == False:
if basemodel == 'TSK':
Ytest_sourcemodel = X_p_test.dot(Ps)
Y_output = Ytest_sourcemodel + residual_test
else: # the basemodel from sklearn
Ytest_sourcemodel = basemodel.predict(X_test)
Y_output = Ytest_sourcemodel + residual_test
else:
# fit a output model with new data, anymodel is ok, here is KRR
outmodel = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1), cv=5,
param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3],
"gamma": np.logspace(-2, 2, 5)})
if basemodel == 'TSK':
Ys_sourcemodel = X_p_s.dot(Ps)
Ys_new = Ys_sourcemodel + residual_source
else: # the basemodel from sklearn
Ys_sourcemodel = basemodel.predict(X_source)
Ys_new = Ys_sourcemodel + residual_source
X_all = np.vstack((X_source, X_target))
Y_all = np.vstack((Ys_new, Y_target))
outmodel.fit(X_all,Y_all)
Y_output = outmodel.predict(X_test)
# return each rules, only when basemodel =='TSK'
x_each = np.ones((3, n_cluster))
y_each = np.ones((3, n_cluster))
if basemodel == 'TSK':
for k in range(n_cluster):
Ps[k*(d+1)+d] = Ps[k*(d+1)+d] + Z[k]
dis = (max(X_source) - min(X_source))/(n_cluster*5)
for k in range(n_cluster):
x_each[:,k] = [centers[k]-width*dis, centers[k], centers[k]+width*dis]
x_e = np.vstack((x_each[:,k].T, np.ones((3)))).T
P_ = Ps[k*(d+1): (k+1)*(d+1)]
y_each[:,k] = x_e.dot(P_).reshape(-1)
return Y_output, x_each, y_each