Skip to content

Commit

Permalink
report 1: normailization
Browse files Browse the repository at this point in the history
  • Loading branch information
orbxball committed Jun 1, 2017
1 parent db99898 commit 0b54e9b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
10 changes: 5 additions & 5 deletions hw6/Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ def build_cf_model(n_users, n_movies, dim):
m = Embedding(n_movies, dim)(m_input)
m = Reshape((dim,))(m)

u_bias = Embedding(n_users, 1)(u_input)
u_bias = Reshape((1,))(u_bias)
m_bias = Embedding(n_movies, 1)(m_input)
m_bias = Reshape((1,))(m_bias)
# u_bias = Embedding(n_users, 1)(u_input)
# u_bias = Reshape((1,))(u_bias)
# m_bias = Embedding(n_movies, 1)(m_input)
# m_bias = Reshape((1,))(m_bias)

out = dot([u, m], -1)
out = add([out, u_bias, m_bias])
# out = add([out, u_bias, m_bias])

model = Model(inputs=[u_input, m_input], outputs=out)
return model
Expand Down
4 changes: 3 additions & 1 deletion hw6/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def main(args):
print('Loading model done!!!')

recommendations = pd.read_csv(TEST_CSV, usecols=['TestDataID'])
recommendations['Rating'] = test_data.apply(lambda x: predict_rating(trained_model, x['UserID'], x['MovieID']), axis=1)
recommendations['Rating'] = test_data.apply(lambda x: predict_rating(trained_model, x['UserID'], x['MovieID']) * std + mean, axis=1)
# print(recommendations)

ensure_dir(args.output)
Expand Down Expand Up @@ -70,5 +70,7 @@ def main(args):
DIM = list(info['dim'])[0]
max_userid = list(info['max_userid'])[0]
max_movieid = list(info['max_movieid'])[0]
mean = list(info['mean'])[0]
std = list(info['std'])[0]

main(args)
21 changes: 13 additions & 8 deletions hw6/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def parse_args():


def rmse(y_true, y_pred):
y_pred = K.clip(y_pred, 1., 5.)
# y_pred = K.clip(y_pred, 1., 5.)
return K.sqrt(K.mean(K.square((y_true - y_pred))))

def main(args):
Expand All @@ -29,20 +29,25 @@ def main(args):
ratings['Movie_emb_id'] = ratings['MovieID'] - 1
print('{} ratings loaded.'.format(ratings.shape[0]))

maximum = {}
maximum['max_userid'] = [max_userid]
maximum['max_movieid'] = [max_movieid]
maximum['dim'] = [DIM]
pd.DataFrame(data=maximum).to_csv(MAX_FILE, index=False)
print('max info save to {}'.format(MAX_FILE))

ratings = ratings.sample(frac=1)
Users = ratings['User_emb_id'].values
print('Users: {}, shape = {}'.format(Users, Users.shape))
Movies = ratings['Movie_emb_id'].values
print('Movies: {}, shape = {}'.format(Movies, Movies.shape))
Ratings = ratings['Rating'].values
print('Ratings: {}, shape = {}'.format(Ratings, Ratings.shape))
mean = Ratings.mean()
std = Ratings.std()
Ratings = (Ratings - mean) / (std + 1e-100)

maximum = {}
maximum['max_userid'] = [max_userid]
maximum['max_movieid'] = [max_movieid]
maximum['dim'] = [DIM]
maximum['mean'] = [mean]
maximum['std'] = [std]
pd.DataFrame(data=maximum).to_csv(MAX_FILE, index=False)
print('max info save to {}'.format(MAX_FILE))

model = build_cf_model(max_userid, max_movieid, DIM)
model.compile(loss='mse', optimizer='adamax', metrics=[rmse])
Expand Down

0 comments on commit 0b54e9b

Please sign in to comment.