Skip to content

Commit

Permalink
MusciBase
Browse files Browse the repository at this point in the history
  • Loading branch information
X-XG committed Jan 18, 2022
1 parent 84005b2 commit 20d3ae1
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 0 deletions.
65 changes: 65 additions & 0 deletions exp3/src/MusicBase/MatSim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import numpy as np

MusicNum = 21602
MIN_NUM = -30000
data_path = '../../data/DoubanMusic.txt'
output_path = '../../output/'


def JaccobiSim(list1:list, list2:list):
intersection = len(set(list1).intersection(set(list2)))
union = len(list1) + len(list2) - intersection
return intersection/union
# intersection = 0
# i = 0
# j = 0
# while i < len(list1) and j < len(list2):
# if list1[i] == list2[j]:
# intersection += 1
# i += 1
# j += 1
# elif list1[i] < list2[j]:
# i += 1
# else:
# j += 1
# union = len(list1) + len(list2) - intersection
# return intersection/union

def MatSimGen():
MapMusicID = {}
MatSim = np.zeros((MusicNum, MusicNum))

f = open(data_path, 'r')
lines = f.readlines()
f.close()

for line in lines:
temp = line.split()
UserID = int(temp[0])
for pair in temp[1:]:
MusicID = int(pair.split(',')[0])
if MusicID not in MapMusicID:
MapMusicID[MusicID] = [UserID]
else:
MapMusicID[MusicID].append(UserID)

num = 0
for Music1 in MapMusicID:
num += 1
if num %10 == 0:
print(num)
for Music2 in MapMusicID:
MatSim[Music1][Music2] = JaccobiSim(MapMusicID[Music1], MapMusicID[Music2])
np.save('MatJaccobiSim.npy', MatSim)

def MatDiagMinimize():
MatSim = np.load('MatJaccobiSim.npy')
for i in range(MusicNum):
MatSim[i][i] = MIN_NUM
np.save('MatJaccobiSim_DiagMinimized.npy', MatSim)

if __name__ == '__main__':
MatDiagMinimize()
# MatSimGen()


57 changes: 57 additions & 0 deletions exp3/src/MusicBase/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np

MatSimPath = 'MatJaccobiSim_DiagMinimized.npy'
data_path = '../../data/DoubanMusic.txt'
result_path = 'MusicBase.txt'
MusicNum = 21602

def main(CarryOn = False):
MatSim = np.load(MatSimPath)
UserMap = {}

f = open(data_path, 'r')
lines = f.readlines()
f.close()

for line in lines:
temp = line.split()
UserID = int(temp[0])
for pair in temp[1:]:
MusicID = int(pair.split(',')[0])
if UserID not in UserMap:
UserMap[UserID] = [MusicID]
else:
UserMap[UserID].append(MusicID)

if CarryOn:
f = open(result_path, 'r')
count = len(f.readlines())
f.close()
f = open(result_path, 'a')
else:
f = open(result_path, 'w')
count = 0

for UserID in UserMap:
if UserID < count:
continue
predict_list = []
for MusicID in range(MusicNum):
predict = 0
for UserMusic in UserMap[UserID]:
predict += MatSim[UserMusic][MusicID]
predict_list.append((predict,MusicID))
predict_list.sort(reverse=True)
f.write(str(UserID))
f.write('\t')
f.write(str(predict_list[0][1]))
for i in range(1,100):
f.write(',')
f.write(str(predict_list[i][1]))
f.write('\n')
count += 1
if count % 1 == 0:
print(count)

if __name__ == '__main__':
main(CarryOn=True)

0 comments on commit 20d3ae1

Please sign in to comment.