Skip to content

Commit

Permalink
'删除了预训练模型'
Browse files Browse the repository at this point in the history
  • Loading branch information
Estelle-gqy committed Feb 20, 2023
1 parent 1788a99 commit 18f6c23
Show file tree
Hide file tree
Showing 76 changed files with 1,021,418 additions and 387 deletions.
13 changes: 13 additions & 0 deletions .idea/SELFRec_MHCN.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/other.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

734 changes: 609 additions & 125 deletions .idea/workspace.xml

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions SELFRec.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
from data.loader import FileIO


class SELFRec(object):
def __init__(self, config):
self.social_data = []
self.feature_data = []
self.config = config
self.training_data = FileIO.load_data_set(config['training.set'], config['model.type'])
self.test_data = FileIO.load_data_set(config['test.set'], config['model.type'])
self.dev_data = FileIO.load_data_set(config['dev.set'], config['model.type'])

self.kwargs = {}
if config.contain('social.data'):
social_data = FileIO.load_social_data(self.config['social.data'])
self.kwargs['social.data'] = social_data
# if config.contains('feature.data'):
# self.social_data = FileIO.loadFeature(config,self.config['feature.data'])
print('Reading data and preprocessing...')

def execute(self):
# import the model module
import_str = 'from model.' + self.config['model.type'] + '.' + self.config['model.name'] + ' import ' + self.config['model.name']
exec(import_str) # gqy: exec 执行储存在字符串或文件中的 Python 语句,相比于 eval,exec可以执行更复杂的 Python 代码
recommender = self.config['model.name'] + '(self.config,self.training_data,self.test_data,**self.kwargs)'
recommender = self.config['model.name'] + '(self.config,self.training_data,self.test_data,self.dev_data,**self.kwargs)'
eval(recommender).execute() # gqy:eval() 函数用来执行一个字符串表达式,并返回表达式的值。
Binary file added __pycache__/SELFRec.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/SELFRec.cpython-37.pyc
Binary file not shown.
Binary file added base/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added base/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file added base/__pycache__/graph_recommender.cpython-37.pyc
Binary file not shown.
Binary file added base/__pycache__/recommender.cpython-36.pyc
Binary file not shown.
Binary file added base/__pycache__/recommender.cpython-37.pyc
Binary file not shown.
Binary file added base/__pycache__/tf_interface.cpython-37.pyc
Binary file not shown.
146 changes: 135 additions & 11 deletions base/graph_recommender.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import tensorflow as tf
from base.recommender import Recommender
from data.ui_graph import Interaction
from util.algorithm import find_k_largest
Expand All @@ -9,9 +10,9 @@


class GraphRecommender(Recommender):
def __init__(self, conf, training_set, test_set, **kwargs):
super(GraphRecommender, self).__init__(conf, training_set, test_set, **kwargs)
self.data = Interaction(conf, training_set, test_set)
def __init__(self, conf, training_set, test_set, dev_data, **kwargs):
super(GraphRecommender, self).__init__(conf, training_set, test_set, dev_data, **kwargs)
self.data = Interaction(conf, training_set, test_set, dev_data) # self.data源头
self.bestPerformance = []
top = self.ranking['-topN'].split(',')
self.topN = [int(num) for num in top]
Expand Down Expand Up @@ -45,12 +46,12 @@ def process_bar(num, total):
rec_list = {}
user_count = len(self.data.test_set)
for i, user in enumerate(self.data.test_set):
candidates = self.predict(user)
item_candidates = self.predict(user) # 预测每个用户可能购买的候选item
# predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
rated_list, li = self.data.user_rated(user)
for item in rated_list:
candidates[self.data.item[item]] = -10e8
ids, scores = find_k_largest(self.max_N, candidates)
item_candidates[self.data.item[item]] = -10e8
ids, scores = find_k_largest(self.max_N, item_candidates)
item_names = [self.data.id2item[iid] for iid in ids]
rec_list[user] = list(zip(item_names, scores))
if i % 1000 == 0:
Expand All @@ -59,6 +60,7 @@ def process_bar(num, total):
print('')
return rec_list


def evaluate(self, rec_list):
self.recOutput.append('userId: recommendations in (itemId, ranking score) pairs, * means the item is hit.\n')
for user in self.data.test_set:
Expand All @@ -76,7 +78,8 @@ def evaluate(self, rec_list):
FileIO.write_file(out_dir, file_name, self.recOutput)
print('The result has been output to ', abspath(out_dir), '.')
file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
with tf.name_scope("result"):
self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
self.model_log.add('###Evaluation Results###')
self.model_log.add(self.result)
FileIO.write_file(out_dir, file_name, self.result)
Expand All @@ -93,11 +96,9 @@ def fast_evaluation(self, epoch):
k, v = m.strip().split(':')
performance[k] = float(v)
for k in self.bestPerformance[1]:
if self.bestPerformance[1][k] > performance[k]:
if self.bestPerformance[1][k] < performance[k]:
count += 1
else:
count -= 1
if count < 0:
if count >= 2:
self.bestPerformance[1] = performance
self.bestPerformance[0] = epoch + 1
self.save()
Expand Down Expand Up @@ -126,3 +127,126 @@ def fast_evaluation(self, epoch):
print('Epoch:', str(self.bestPerformance[0]) + ',', bp)
print('-' * 120)
return measure

# 预测每个item可能购买的候选用户
def test_by_item(self):
def process_bar(num, total):
rate = float(num) / total
ratenum = int(50 * rate)
r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
sys.stdout.write(r)
sys.stdout.flush()

# predict
rec_list = {}
item_count = len(self.data.test_set_i)
for i, item in enumerate(self.data.test_set_i):
user_candidates = self.predict_by_item(item) # 预测每个item可能购买的候选用户
# predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
rated_list, li = self.data.item_rated(item)
for user in rated_list:
user_candidates[self.data.user[user]] = -10e8
ids, scores = find_k_largest(self.max_N, user_candidates)
user_names = [self.data.id2user[uid] for uid in ids]
rec_list[item] = list(zip(user_names, scores))
if i % 1000 == 0:
process_bar(i, item_count)
process_bar(item_count, item_count)
print('')
return rec_list

# 预测每个item可能购买的候选用户
def dev_by_item(self):
def process_bar(num, total):
rate = float(num) / total
ratenum = int(50 * rate)
r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum * 2)
sys.stdout.write(r)
sys.stdout.flush()

# predict
rec_list = {}
item_count = len(self.data.dev_set_i)
for i, item in enumerate(self.data.dev_set_i):
user_candidates = self.predict_by_item(item) # 预测每个item可能购买的候选用户
# predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
rated_list, li = self.data.item_rated(item)
for user in rated_list:
user_candidates[self.data.user[user]] = -10e8
ids, scores = find_k_largest(self.max_N, user_candidates)
user_names = [self.data.id2user[uid] for uid in ids]
rec_list[item] = list(zip(user_names, scores))
if i % 1000 == 0:
process_bar(i, item_count)
process_bar(item_count, item_count)
print('')
return rec_list

def evaluate_by_item(self, rec_list):
self.recOutput.append('itemId: recommendations in (userId, ranking score) pairs, * means the user is hit.\n')
for item in self.data.test_set_i:
line = item + ':'
for user in rec_list[item]:
line += ' (' + user[0] + ',' + str(user[1]) + ')'
if user[0] in self.data.test_set_i[item]:
line += '*'
line += '\n'
self.recOutput.append(line)
current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
# output prediction result
out_dir = self.output['-dir']
file_name = self.config['model.name'] + '@' + current_time + '-top-' + str(self.max_N) + 'items' + '.txt'
FileIO.write_file(out_dir, file_name, self.recOutput)
print('The result has been output to ', abspath(out_dir), '.')
file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
with tf.name_scope("result"):
self.result = ranking_evaluation(self.data.test_set_i, rec_list, self.topN)
self.model_log.add('###Evaluation Results###')
self.model_log.add(self.result)
FileIO.write_file(out_dir, file_name, self.result)
print('The result of %s:\n%s' % (self.model_name, ''.join(self.result)))

def fast_evaluation_by_item(self, epoch):
print('Evaluating the model...')
rec_list = self.dev_by_item()
measure = ranking_evaluation(self.data.dev_set_i, rec_list, [self.max_N])
if len(self.bestPerformance) > 0:
count = 0
performance = {}
for m in measure[1:]:
k, v = m.strip().split(':')
performance[k] = float(v)

# 判断是否要更新最佳epoch数据。共有4个指标
for k in self.bestPerformance[1]:
if self.bestPerformance[1][k] < performance[k]:
count += 1
if count >= 2:
self.bestPerformance[1] = performance
self.bestPerformance[0] = epoch + 1
self.save()
else:
self.bestPerformance.append(epoch + 1)
performance = {}
for m in measure[1:]:
k, v = m.strip().split(':')
performance[k] = float(v)
self.bestPerformance.append(performance)
self.save()
print('-' * 120)
print('Real-Time Ranking Performance ' + ' (Top-' + str(self.max_N) + ' User Recommendation)')
measure = [m.strip() for m in measure[1:]]
print('*Current Performance*')
print('Epoch:', str(epoch + 1) + ',', ' | '.join(measure))
bp = ''
# for k in self.bestPerformance[1]:
# bp+=k+':'+str(self.bestPerformance[1][k])+' | '
bp += 'Hit Ratio' + ':' + str(self.bestPerformance[1]['Hit Ratio']) + ' | '
bp += 'Precision' + ':' + str(self.bestPerformance[1]['Precision']) + ' | '
bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + ' | '
# bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
bp += 'NDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
print('*Best Performance* ')
print('Epoch:', str(self.bestPerformance[0]) + ',', bp)
print('-' * 120)
return measure
18 changes: 14 additions & 4 deletions base/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@


class Recommender(object):
def __init__(self, conf, training_set, test_set, **kwargs):
def __init__(self, conf, training_set, test_set, dev_data, **kwargs):
self.config = conf
self.data = Data(self.config, training_set, test_set)
self.data = Data(self.config, training_set, test_set, dev_data)
self.model_name = self.config['model.name']
self.ranking = OptionConf(self.config['item.ranking'])
self.emb_size = int(self.config['embedding.size'])
Expand Down Expand Up @@ -52,9 +52,15 @@ def train(self):
def predict(self, u):
pass

def predict_by_item(self, u):
pass

def test(self):
pass

def test_by_item(self):
pass

def save(self):
pass

Expand All @@ -64,6 +70,9 @@ def load(self):
def evaluate(self, rec_list):
pass

def evaluate_by_item(self, rec_list):
pass

def execute(self):
self.initializing_log()
self.print_model_info()
Expand All @@ -72,6 +81,7 @@ def execute(self):
print('Training Model...')
self.train()
print('Testing...')
rec_list = self.test()
# rec_list = self.test() # 预测每个用户可能购买的候选item
rec_list = self.test_by_item() # 预测每个item可能购买的候选用户
print('Evaluating...')
self.evaluate(rec_list)
self.evaluate_by_item(rec_list)
1 change: 1 addition & 0 deletions base/tf_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def __init__(self):
def convert_sparse_mat_to_tensor(adj):
row, col = adj.nonzero()
indices = np.array(list(zip(row, col)))
# tf中SparseTensor用于创建稀疏张量;pytorch使用sparse_coo_tensor函数创建稀疏张量
adj_tensor = tf.SparseTensor(indices=indices, values=adj.data, dense_shape=adj.shape)
return adj_tensor

Expand Down
15 changes: 8 additions & 7 deletions conf/MHCN.conf
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
training.set=./dataset/douban-book/train.txt
test.set=./dataset/douban-book/test.txt
social.data=./dataset/douban-book/trust.txt
training.set=./dataset/retweet_prediction/train.txt
test.set=./dataset/retweet_prediction/test.txt
dev.set=./dataset/retweet_prediction/dev.txt
social.data=./dataset/retweet_prediction/trust.txt
model.name=MHCN
model.type=graph
item.ranking=-topN 10,20
item.ranking=-topN 100
embedding.size=64
num.max.epoch=30
batch_size=4096
learnRate=0.001
num.max.epoch=10
batch_size=256
learnRate=0.00001
reg.lambda=0.0001
MHCN=-n_layer 2 -ss_rate 0.01
output.setup=-dir ./results/
Binary file added data/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added data/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/data.cpython-36.pyc
Binary file not shown.
Binary file added data/__pycache__/data.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/graph.cpython-36.pyc
Binary file not shown.
Binary file added data/__pycache__/graph.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/loader.cpython-36.pyc
Binary file not shown.
Binary file added data/__pycache__/loader.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/retweet_title_idf.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/social.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/ui_graph.cpython-36.pyc
Binary file not shown.
Binary file added data/__pycache__/ui_graph.cpython-37.pyc
Binary file not shown.
3 changes: 2 additions & 1 deletion data/data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
class Data(object):
def __init__(self, conf, training, test):
def __init__(self, conf, training, test, dev):
self.config = conf
self.training_data = training
self.test_data = test # can also be validation set if the input is for validation
self.dev_data = dev



Expand Down
19 changes: 15 additions & 4 deletions data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ def delete_file(file_path):
def load_data_set(file, rec_type):
if rec_type == 'graph':
data = []
with open(file) as f:
with open(file, encoding='utf-8', errors='ignore') as f:
for line in f:
items = split(' ', line.strip())
user_id = items[0]
item_id = items[1]
weight = items[2]
data.append([user_id, item_id, float(weight)])
title = items[2]
weight = items[3]
data.append([user_id, item_id, title, float(weight)])

if rec_type == 'sequential':
data = {}
Expand Down Expand Up @@ -59,8 +60,18 @@ def load_social_data(file):
user1 = items[0]
user2 = items[1]
if len(items) < 3:
weight = 1
weight = 1 # 权重统一设置为1
else:
weight = float(items[2])
social_data.append([user1, user2, weight])
return social_data

@staticmethod
def load_titles_data(file):
titles = []
print('loading titles data...')
with open(file, encoding='utf-8', errors='ignore') as f:
for line in f:
items = split(' ', line.strip())
titles = items[0]
return titles
Loading

0 comments on commit 18f6c23

Please sign in to comment.