Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
talkingwallace committed May 23, 2019
0 parents commit 5347a5a
Show file tree
Hide file tree
Showing 43 changed files with 803,579 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
11 changes: 11 additions & 0 deletions .idea/GraphNN.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

342 changes: 342 additions & 0 deletions .idea/workspace.xml

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions DGLtest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import torch
import dgl

def build_karate_club_graph():
g = dgl.DGLGraph()
# add 34 nodes into the graph; nodes are labeled from 0~33
g.add_nodes(34)
# all 78 edges as a list of tuples
edge_list = [(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2),
(4, 0), (5, 0), (6, 0), (6, 4), (6, 5), (7, 0), (7, 1),
(7, 2), (7, 3), (8, 0), (8, 2), (9, 2), (10, 0), (10, 4),
(10, 5), (11, 0), (12, 0), (12, 3), (13, 0), (13, 1), (13, 2),
(13, 3), (16, 5), (16, 6), (17, 0), (17, 1), (19, 0), (19, 1),
(21, 0), (21, 1), (25, 23), (25, 24), (27, 2), (27, 23),
(27, 24), (28, 2), (29, 23), (29, 26), (30, 1), (30, 8),
(31, 0), (31, 24), (31, 25), (31, 28), (32, 2), (32, 8),
(32, 14), (32, 15), (32, 18), (32, 20), (32, 22), (32, 23),
(32, 29), (32, 30), (32, 31), (33, 8), (33, 9), (33, 13),
(33, 14), (33, 15), (33, 18), (33, 19), (33, 20), (33, 22),
(33, 23), (33, 26), (33, 27), (33, 28), (33, 29), (33, 30),
(33, 31), (33, 32)]
# add edges two lists of nodes: src and dst
src, dst = tuple(zip(*edge_list))
g.add_edges(src, dst)
# edges are directional in DGL; make them bi-directional
g.add_edges(dst, src)

return g
179 changes: 179 additions & 0 deletions GraphNCF/GCFmodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import torch
import torch.nn as nn
from torch.nn import Module
from scipy.sparse import coo_matrix
from scipy.sparse import vstack
from scipy import sparse
import numpy as np

# RMSE
# SVD dim = 50 50 epoch RMSE = 0.931
# GNCF dim = 64 layer = [64,64,64] nn = [128,64,32,] 50 epoch RMSE = 0.916/RMSE =0.914
# NCF dim = 64 50 nn = [128,54,32] epoch 50 RMSE = 0.928

class SVD(Module):

def __init__(self,userNum,itemNum,dim):
super(SVD, self).__init__()
self.uEmbd = nn.Embedding(userNum,dim)
self.iEmbd = nn.Embedding(itemNum,dim)
self.uBias = nn.Embedding(userNum,1)
self.iBias = nn.Embedding(itemNum,1)
self.overAllBias = nn.Parameter(torch.Tensor([0]))

def forward(self, userIdx,itemIdx):
uembd = self.uEmbd(userIdx)
iembd = self.iEmbd(itemIdx)
ubias = self.uBias(userIdx)
ibias = self.iBias(itemIdx)

biases = ubias + ibias + self.overAllBias
prediction = torch.sum(torch.mul(uembd,iembd),dim=1) + biases.flatten()

return prediction

class NCF(Module):

def __init__(self,userNum,itemNum,dim,layers=[128,64,32,8]):
super(NCF, self).__init__()
self.uEmbd = nn.Embedding(userNum,dim)
self.iEmbd = nn.Embedding(itemNum,dim)
self.fc_layers = torch.nn.ModuleList()
self.finalLayer = torch.nn.Linear(layers[-1],1)

for From,To in zip(layers[:-1],layers[1:]):
self.fc_layers.append(nn.Linear(From,To))

def forward(self, userIdx,itemIdx):
uembd = self.uEmbd(userIdx)
iembd = self.iEmbd(itemIdx)
embd = torch.cat([uembd, iembd], dim=1)
x = embd
for l in self.fc_layers:
x = l(x)
x = nn.ReLU()(x)

prediction = self.finalLayer(x)
return prediction.flatten()


class GNNLayer(Module):

def __init__(self,inF,outF):

super(GNNLayer,self).__init__()
self.inF = inF
self.outF = outF
self.linear = torch.nn.Linear(in_features=inF,out_features=outF)
self.interActTransform = torch.nn.Linear(in_features=inF,out_features=outF)

def forward(self, laplacianMat,selfLoop,features):
# for GCF ajdMat is a (N+M) by (N+M) mat
# laplacianMat L = D^-1(A)D^-1 # 拉普拉斯矩阵
L1 = laplacianMat + selfLoop
L2 = laplacianMat.cuda()
L1 = L1.cuda()
inter_feature = torch.mul(features,features)

inter_part1 = self.linear(torch.sparse.mm(L1,features))
inter_part2 = self.interActTransform(torch.sparse.mm(L2,inter_feature))

return inter_part1+inter_part2

class GCF(Module):

def __init__(self,userNum,itemNum,rt,embedSize=100,layers=[100,80,50],useCuda=True):

super(GCF,self).__init__()
self.useCuda = useCuda
self.userNum = userNum
self.itemNum = itemNum
self.uEmbd = nn.Embedding(userNum,embedSize)
self.iEmbd = nn.Embedding(itemNum,embedSize)
self.GNNlayers = torch.nn.ModuleList()
self.LaplacianMat = self.buildLaplacianMat(rt) # sparse format
self.leakyRelu = nn.LeakyReLU()
self.selfLoop = self.getSparseEye(self.userNum+self.itemNum)

self.transForm1 = nn.Linear(in_features=layers[-1]*(len(layers))*2,out_features=64)
self.transForm2 = nn.Linear(in_features=64,out_features=32)
self.transForm3 = nn.Linear(in_features=32,out_features=1)

for From,To in zip(layers[:-1],layers[1:]):
self.GNNlayers.append(GNNLayer(From,To))

def getSparseEye(self,num):
i = torch.LongTensor([[k for k in range(0,num)],[j for j in range(0,num)]])
val = torch.FloatTensor([1]*num)
return torch.sparse.FloatTensor(i,val)

def buildLaplacianMat(self,rt):

rt_item = rt['itemId'] + self.userNum
uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId'])))

uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item)))
uiMat = uiMat.transpose()
uiMat.resize((self.itemNum, self.userNum + self.itemNum))

A = sparse.vstack([uiMat_upperPart,uiMat])
selfLoop = sparse.eye(self.userNum+self.itemNum)
sumArr = (A>0).sum(axis=1)
diag = list(np.array(sumArr.flatten())[0])
diag = np.power(diag,-0.5)
D = sparse.diags(diag)
L = D * A * D
L = sparse.coo_matrix(L)
row = L.row
col = L.col
i = torch.LongTensor([row,col])
data = torch.FloatTensor(L.data)
SparseL = torch.sparse.FloatTensor(i,data)
return SparseL

def getFeatureMat(self):
uidx = torch.LongTensor([i for i in range(self.userNum)])
iidx = torch.LongTensor([i for i in range(self.itemNum)])
if self.useCuda == True:
uidx = uidx.cuda()
iidx = iidx.cuda()

userEmbd = self.uEmbd(uidx)
itemEmbd = self.iEmbd(iidx)
features = torch.cat([userEmbd,itemEmbd],dim=0)
return features

def forward(self,userIdx,itemIdx):

itemIdx = itemIdx + self.userNum
userIdx = list(userIdx.cpu().data)
itemIdx = list(itemIdx.cpu().data)
# gcf data propagation
features = self.getFeatureMat()
finalEmbd = features.clone()
for gnn in self.GNNlayers:
features = gnn(self.LaplacianMat,self.selfLoop,features)
features = nn.ReLU()(features)
finalEmbd = torch.cat([finalEmbd,features.clone()],dim=1)

userEmbd = finalEmbd[userIdx]
itemEmbd = finalEmbd[itemIdx]
embd = torch.cat([userEmbd,itemEmbd],dim=1)

embd = nn.ReLU()(self.transForm1(embd))
embd = self.transForm2(embd)
embd = self.transForm3(embd)
prediction = embd.flatten()

return prediction

if __name__ == '__main__':
from toyDataset.loaddata import load100KRatings

rt = load100KRatings()
userNum = rt['userId'].max()
itemNum = rt['itemId'].max()

rt['userId'] = rt['userId'] - 1
rt['itemId'] = rt['itemId'] - 1
gcf = GCF(userNum,itemNum,rt)
Empty file added GraphNCF/__init__.py
Empty file.
Binary file added GraphNCF/__pycache__/GCFmodel.cpython-37.pyc
Binary file not shown.
Binary file added GraphNCF/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
15 changes: 15 additions & 0 deletions GraphNCF/dataPreprosessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from torch.utils.data import Dataset

class ML1K(Dataset):

def __init__(self,rt):
super(Dataset,self).__init__()
self.uId = list(rt['userId'])
self.iId = list(rt['itemId'])
self.rt = list(rt['rating'])

def __len__(self):
return len(self.uId)

def __getitem__(self, item):
return (self.uId[item],self.iId[item],self.rt[item])
80 changes: 80 additions & 0 deletions GraphNCF/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import torch
from torch import nn as nn
from toyDataset.loaddata import load100KRatings
from scipy.sparse import coo_matrix
import pandas as pd
import numpy as np
from numpy import diag
from GraphNCF.GCFmodel import GCF
from torch.utils.data import DataLoader
from GraphNCF.dataPreprosessing import ML1K
from torch.utils.data import random_split
from torch.optim import Adam
from torch.nn import MSELoss
from GraphNCF.GCFmodel import SVD
from GraphNCF.GCFmodel import NCF

rt = load100KRatings()
userNum = rt['userId'].max()
itemNum = rt['itemId'].max()

rt['userId'] = rt['userId'] - 1
rt['itemId'] = rt['itemId'] - 1
#
# rtIt = rt['itemId'] + userNum
# uiMat = coo_matrix((rt['rating'],(rt['userId'],rt['itemId'])))
# uiMat_upperPart = coo_matrix((rt['rating'],(rt['userId'],rtIt)))
# uiMat = uiMat.transpose()
# uiMat.resize((itemNum,userNum+itemNum))
# uiMat = uiMat.todense()
# uiMat_t = uiMat.transpose()
# zeros1 = np.zeros((userNum,userNum))
# zeros2 = np.zeros((itemNum,itemNum))
#
# p1 = np.concatenate([zeros1,uiMat],axis=1)
# p2 = np.concatenate([uiMat_t,zeros2],axis=1)
# mat = np.concatenate([p1,p2])
#
# count = (mat > 0)+0
# diagval = np.array(count.sum(axis=0))[0]
# diagval = np.power(diagval,(-1/2))
# D_ = diag(diagval)
#
# L = np.dot(np.dot(D_,mat),D_)
#
para = {
'epoch':60,
'lr':0.01,
'batch_size':2048,
'train':0.8
}

ds = ML1K(rt)
trainLen = int(para['train']*len(ds))
train,test = random_split(ds,[trainLen,len(ds)-trainLen])
dl = DataLoader(train,batch_size=para['batch_size'],shuffle=True,pin_memory=True)

model = GCF(userNum, itemNum, rt, 80, layers=[80,80,]).cuda()
# model = SVD(userNum,itemNum,50).cuda()
# model = NCF(userNum,itemNum,64,layers=[128,64,32,16,8]).cuda()
optim = Adam(model.parameters(), lr=para['lr'],weight_decay=0.001)
lossfn = MSELoss()

for i in range(para['epoch']):

for id,batch in enumerate(dl):
print('epoch:',i,' batch:',id)
optim.zero_grad()
prediction = model(batch[0].cuda(), batch[1].cuda())
loss = lossfn(batch[2].float().cuda(),prediction)
loss.backward()
optim.step()
print(loss)


testdl = DataLoader(test,batch_size=len(test),)
for data in testdl:
prediction = model(data[0].cuda(),data[1].cuda())

loss = lossfn(data[2].float().cuda(),prediction)
print(loss)
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# GraphNN
Loading

0 comments on commit 5347a5a

Please sign in to comment.