forked from talkingwallace/NGCF-pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 5347a5a
Showing
43 changed files
with
803,579 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto detect text files and perform LF normalization | ||
* text=auto |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import torch | ||
import dgl | ||
|
||
def build_karate_club_graph(): | ||
g = dgl.DGLGraph() | ||
# add 34 nodes into the graph; nodes are labeled from 0~33 | ||
g.add_nodes(34) | ||
# all 78 edges as a list of tuples | ||
edge_list = [(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2), | ||
(4, 0), (5, 0), (6, 0), (6, 4), (6, 5), (7, 0), (7, 1), | ||
(7, 2), (7, 3), (8, 0), (8, 2), (9, 2), (10, 0), (10, 4), | ||
(10, 5), (11, 0), (12, 0), (12, 3), (13, 0), (13, 1), (13, 2), | ||
(13, 3), (16, 5), (16, 6), (17, 0), (17, 1), (19, 0), (19, 1), | ||
(21, 0), (21, 1), (25, 23), (25, 24), (27, 2), (27, 23), | ||
(27, 24), (28, 2), (29, 23), (29, 26), (30, 1), (30, 8), | ||
(31, 0), (31, 24), (31, 25), (31, 28), (32, 2), (32, 8), | ||
(32, 14), (32, 15), (32, 18), (32, 20), (32, 22), (32, 23), | ||
(32, 29), (32, 30), (32, 31), (33, 8), (33, 9), (33, 13), | ||
(33, 14), (33, 15), (33, 18), (33, 19), (33, 20), (33, 22), | ||
(33, 23), (33, 26), (33, 27), (33, 28), (33, 29), (33, 30), | ||
(33, 31), (33, 32)] | ||
# add edges two lists of nodes: src and dst | ||
src, dst = tuple(zip(*edge_list)) | ||
g.add_edges(src, dst) | ||
# edges are directional in DGL; make them bi-directional | ||
g.add_edges(dst, src) | ||
|
||
return g |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
import torch | ||
import torch.nn as nn | ||
from torch.nn import Module | ||
from scipy.sparse import coo_matrix | ||
from scipy.sparse import vstack | ||
from scipy import sparse | ||
import numpy as np | ||
|
||
# RMSE | ||
# SVD dim = 50 50 epoch RMSE = 0.931 | ||
# GNCF dim = 64 layer = [64,64,64] nn = [128,64,32,] 50 epoch RMSE = 0.916/RMSE =0.914 | ||
# NCF dim = 64 50 nn = [128,54,32] epoch 50 RMSE = 0.928 | ||
|
||
class SVD(Module): | ||
|
||
def __init__(self,userNum,itemNum,dim): | ||
super(SVD, self).__init__() | ||
self.uEmbd = nn.Embedding(userNum,dim) | ||
self.iEmbd = nn.Embedding(itemNum,dim) | ||
self.uBias = nn.Embedding(userNum,1) | ||
self.iBias = nn.Embedding(itemNum,1) | ||
self.overAllBias = nn.Parameter(torch.Tensor([0])) | ||
|
||
def forward(self, userIdx,itemIdx): | ||
uembd = self.uEmbd(userIdx) | ||
iembd = self.iEmbd(itemIdx) | ||
ubias = self.uBias(userIdx) | ||
ibias = self.iBias(itemIdx) | ||
|
||
biases = ubias + ibias + self.overAllBias | ||
prediction = torch.sum(torch.mul(uembd,iembd),dim=1) + biases.flatten() | ||
|
||
return prediction | ||
|
||
class NCF(Module): | ||
|
||
def __init__(self,userNum,itemNum,dim,layers=[128,64,32,8]): | ||
super(NCF, self).__init__() | ||
self.uEmbd = nn.Embedding(userNum,dim) | ||
self.iEmbd = nn.Embedding(itemNum,dim) | ||
self.fc_layers = torch.nn.ModuleList() | ||
self.finalLayer = torch.nn.Linear(layers[-1],1) | ||
|
||
for From,To in zip(layers[:-1],layers[1:]): | ||
self.fc_layers.append(nn.Linear(From,To)) | ||
|
||
def forward(self, userIdx,itemIdx): | ||
uembd = self.uEmbd(userIdx) | ||
iembd = self.iEmbd(itemIdx) | ||
embd = torch.cat([uembd, iembd], dim=1) | ||
x = embd | ||
for l in self.fc_layers: | ||
x = l(x) | ||
x = nn.ReLU()(x) | ||
|
||
prediction = self.finalLayer(x) | ||
return prediction.flatten() | ||
|
||
|
||
class GNNLayer(Module): | ||
|
||
def __init__(self,inF,outF): | ||
|
||
super(GNNLayer,self).__init__() | ||
self.inF = inF | ||
self.outF = outF | ||
self.linear = torch.nn.Linear(in_features=inF,out_features=outF) | ||
self.interActTransform = torch.nn.Linear(in_features=inF,out_features=outF) | ||
|
||
def forward(self, laplacianMat,selfLoop,features): | ||
# for GCF ajdMat is a (N+M) by (N+M) mat | ||
# laplacianMat L = D^-1(A)D^-1 # 拉普拉斯矩阵 | ||
L1 = laplacianMat + selfLoop | ||
L2 = laplacianMat.cuda() | ||
L1 = L1.cuda() | ||
inter_feature = torch.mul(features,features) | ||
|
||
inter_part1 = self.linear(torch.sparse.mm(L1,features)) | ||
inter_part2 = self.interActTransform(torch.sparse.mm(L2,inter_feature)) | ||
|
||
return inter_part1+inter_part2 | ||
|
||
class GCF(Module): | ||
|
||
def __init__(self,userNum,itemNum,rt,embedSize=100,layers=[100,80,50],useCuda=True): | ||
|
||
super(GCF,self).__init__() | ||
self.useCuda = useCuda | ||
self.userNum = userNum | ||
self.itemNum = itemNum | ||
self.uEmbd = nn.Embedding(userNum,embedSize) | ||
self.iEmbd = nn.Embedding(itemNum,embedSize) | ||
self.GNNlayers = torch.nn.ModuleList() | ||
self.LaplacianMat = self.buildLaplacianMat(rt) # sparse format | ||
self.leakyRelu = nn.LeakyReLU() | ||
self.selfLoop = self.getSparseEye(self.userNum+self.itemNum) | ||
|
||
self.transForm1 = nn.Linear(in_features=layers[-1]*(len(layers))*2,out_features=64) | ||
self.transForm2 = nn.Linear(in_features=64,out_features=32) | ||
self.transForm3 = nn.Linear(in_features=32,out_features=1) | ||
|
||
for From,To in zip(layers[:-1],layers[1:]): | ||
self.GNNlayers.append(GNNLayer(From,To)) | ||
|
||
def getSparseEye(self,num): | ||
i = torch.LongTensor([[k for k in range(0,num)],[j for j in range(0,num)]]) | ||
val = torch.FloatTensor([1]*num) | ||
return torch.sparse.FloatTensor(i,val) | ||
|
||
def buildLaplacianMat(self,rt): | ||
|
||
rt_item = rt['itemId'] + self.userNum | ||
uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId']))) | ||
|
||
uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item))) | ||
uiMat = uiMat.transpose() | ||
uiMat.resize((self.itemNum, self.userNum + self.itemNum)) | ||
|
||
A = sparse.vstack([uiMat_upperPart,uiMat]) | ||
selfLoop = sparse.eye(self.userNum+self.itemNum) | ||
sumArr = (A>0).sum(axis=1) | ||
diag = list(np.array(sumArr.flatten())[0]) | ||
diag = np.power(diag,-0.5) | ||
D = sparse.diags(diag) | ||
L = D * A * D | ||
L = sparse.coo_matrix(L) | ||
row = L.row | ||
col = L.col | ||
i = torch.LongTensor([row,col]) | ||
data = torch.FloatTensor(L.data) | ||
SparseL = torch.sparse.FloatTensor(i,data) | ||
return SparseL | ||
|
||
def getFeatureMat(self): | ||
uidx = torch.LongTensor([i for i in range(self.userNum)]) | ||
iidx = torch.LongTensor([i for i in range(self.itemNum)]) | ||
if self.useCuda == True: | ||
uidx = uidx.cuda() | ||
iidx = iidx.cuda() | ||
|
||
userEmbd = self.uEmbd(uidx) | ||
itemEmbd = self.iEmbd(iidx) | ||
features = torch.cat([userEmbd,itemEmbd],dim=0) | ||
return features | ||
|
||
def forward(self,userIdx,itemIdx): | ||
|
||
itemIdx = itemIdx + self.userNum | ||
userIdx = list(userIdx.cpu().data) | ||
itemIdx = list(itemIdx.cpu().data) | ||
# gcf data propagation | ||
features = self.getFeatureMat() | ||
finalEmbd = features.clone() | ||
for gnn in self.GNNlayers: | ||
features = gnn(self.LaplacianMat,self.selfLoop,features) | ||
features = nn.ReLU()(features) | ||
finalEmbd = torch.cat([finalEmbd,features.clone()],dim=1) | ||
|
||
userEmbd = finalEmbd[userIdx] | ||
itemEmbd = finalEmbd[itemIdx] | ||
embd = torch.cat([userEmbd,itemEmbd],dim=1) | ||
|
||
embd = nn.ReLU()(self.transForm1(embd)) | ||
embd = self.transForm2(embd) | ||
embd = self.transForm3(embd) | ||
prediction = embd.flatten() | ||
|
||
return prediction | ||
|
||
if __name__ == '__main__': | ||
from toyDataset.loaddata import load100KRatings | ||
|
||
rt = load100KRatings() | ||
userNum = rt['userId'].max() | ||
itemNum = rt['itemId'].max() | ||
|
||
rt['userId'] = rt['userId'] - 1 | ||
rt['itemId'] = rt['itemId'] - 1 | ||
gcf = GCF(userNum,itemNum,rt) |
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from torch.utils.data import Dataset | ||
|
||
class ML1K(Dataset): | ||
|
||
def __init__(self,rt): | ||
super(Dataset,self).__init__() | ||
self.uId = list(rt['userId']) | ||
self.iId = list(rt['itemId']) | ||
self.rt = list(rt['rating']) | ||
|
||
def __len__(self): | ||
return len(self.uId) | ||
|
||
def __getitem__(self, item): | ||
return (self.uId[item],self.iId[item],self.rt[item]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import torch | ||
from torch import nn as nn | ||
from toyDataset.loaddata import load100KRatings | ||
from scipy.sparse import coo_matrix | ||
import pandas as pd | ||
import numpy as np | ||
from numpy import diag | ||
from GraphNCF.GCFmodel import GCF | ||
from torch.utils.data import DataLoader | ||
from GraphNCF.dataPreprosessing import ML1K | ||
from torch.utils.data import random_split | ||
from torch.optim import Adam | ||
from torch.nn import MSELoss | ||
from GraphNCF.GCFmodel import SVD | ||
from GraphNCF.GCFmodel import NCF | ||
|
||
rt = load100KRatings() | ||
userNum = rt['userId'].max() | ||
itemNum = rt['itemId'].max() | ||
|
||
rt['userId'] = rt['userId'] - 1 | ||
rt['itemId'] = rt['itemId'] - 1 | ||
# | ||
# rtIt = rt['itemId'] + userNum | ||
# uiMat = coo_matrix((rt['rating'],(rt['userId'],rt['itemId']))) | ||
# uiMat_upperPart = coo_matrix((rt['rating'],(rt['userId'],rtIt))) | ||
# uiMat = uiMat.transpose() | ||
# uiMat.resize((itemNum,userNum+itemNum)) | ||
# uiMat = uiMat.todense() | ||
# uiMat_t = uiMat.transpose() | ||
# zeros1 = np.zeros((userNum,userNum)) | ||
# zeros2 = np.zeros((itemNum,itemNum)) | ||
# | ||
# p1 = np.concatenate([zeros1,uiMat],axis=1) | ||
# p2 = np.concatenate([uiMat_t,zeros2],axis=1) | ||
# mat = np.concatenate([p1,p2]) | ||
# | ||
# count = (mat > 0)+0 | ||
# diagval = np.array(count.sum(axis=0))[0] | ||
# diagval = np.power(diagval,(-1/2)) | ||
# D_ = diag(diagval) | ||
# | ||
# L = np.dot(np.dot(D_,mat),D_) | ||
# | ||
para = { | ||
'epoch':60, | ||
'lr':0.01, | ||
'batch_size':2048, | ||
'train':0.8 | ||
} | ||
|
||
ds = ML1K(rt) | ||
trainLen = int(para['train']*len(ds)) | ||
train,test = random_split(ds,[trainLen,len(ds)-trainLen]) | ||
dl = DataLoader(train,batch_size=para['batch_size'],shuffle=True,pin_memory=True) | ||
|
||
model = GCF(userNum, itemNum, rt, 80, layers=[80,80,]).cuda() | ||
# model = SVD(userNum,itemNum,50).cuda() | ||
# model = NCF(userNum,itemNum,64,layers=[128,64,32,16,8]).cuda() | ||
optim = Adam(model.parameters(), lr=para['lr'],weight_decay=0.001) | ||
lossfn = MSELoss() | ||
|
||
for i in range(para['epoch']): | ||
|
||
for id,batch in enumerate(dl): | ||
print('epoch:',i,' batch:',id) | ||
optim.zero_grad() | ||
prediction = model(batch[0].cuda(), batch[1].cuda()) | ||
loss = lossfn(batch[2].float().cuda(),prediction) | ||
loss.backward() | ||
optim.step() | ||
print(loss) | ||
|
||
|
||
testdl = DataLoader(test,batch_size=len(test),) | ||
for data in testdl: | ||
prediction = model(data[0].cuda(),data[1].cuda()) | ||
|
||
loss = lossfn(data[2].float().cuda(),prediction) | ||
print(loss) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# GraphNN |
Oops, something went wrong.