Skip to content

Commit

Permalink
rhash
Browse files Browse the repository at this point in the history
  • Loading branch information
X-XG committed Dec 19, 2021
1 parent 9ce35e8 commit e8796c3
Show file tree
Hide file tree
Showing 10 changed files with 33,429 additions and 1 deletion.
2 changes: 2 additions & 0 deletions exp2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
exp2/
|----data/
...
|----rhash/
...
|----openke_transE/
|----base/
|----config/
Expand Down
Binary file modified exp2/report.pdf
Binary file not shown.
120 changes: 120 additions & 0 deletions exp2/rhash/ent_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
ent_map = {}

freq_origin = {}

f_train_origin = open('FB15k-237/train.tsv' ,'r')
lines = f_train_origin.readlines()
for line in lines:
temp = line.split()
word = temp[0]
if word not in freq_origin:
freq_origin[word] = [1,0,0,0,0]
else:
freq_origin[word][0] += 1
word = temp[2]
if word not in freq_origin:
freq_origin[word] = [0,1,0,0,0]
else:
freq_origin[word][1] += 1


f_dev_origin = open('FB15k-237/dev.tsv' ,'r')
lines = f_dev_origin.readlines()
for line in lines:
temp = line.split()
word = temp[0]
if word not in freq_origin:
freq_origin[word] = [0,0,1,0,0]
else:
freq_origin[word][2] += 1
word = temp[2]
if word not in freq_origin:
freq_origin[word] = [0,0,0,1,0]
else:
freq_origin[word][3] += 1


f_test_origin = open('FB15k-237/test.tsv' ,'r')
lines = f_test_origin.readlines()
for line in lines:
temp = line.split()
word = temp[0]
if word not in freq_origin:
freq_origin[word] = [0,0,0,0,1]
else:
freq_origin[word][4] += 1
word = temp[2]


freq_ta = {}

f_train_ta = open('data/train.txt' ,'r')
lines = f_train_ta.readlines()
for line in lines:
temp = line.split()
word = temp[0]
if word not in freq_ta:
freq_ta[word] = [1,0,0,0,0]
else:
freq_ta[word][0] += 1
word = temp[2]
if word not in freq_ta:
freq_ta[word] = [0,1,0,0,0]
else:
freq_ta[word][1] += 1


f_dev_ta = open('data/dev.txt' ,'r')
lines = f_dev_ta.readlines()
for line in lines:
temp = line.split()
word = temp[0]
if word not in freq_ta:
freq_ta[word] = [0,0,1,0,0]
else:
freq_ta[word][2] += 1
word = temp[2]
if word not in freq_ta:
freq_ta[word] = [0,0,0,1,0]
else:
freq_ta[word][3] += 1


f_test_ta = open('data/test.txt' ,'r')
lines = f_test_ta.readlines()
for line in lines:
temp = line.split()
word = temp[0]
if word not in freq_ta:
freq_ta[word] = [0,0,0,0,1]
else:
freq_ta[word][4] += 1

count = 0

for word in freq_origin:
freq_word = freq_origin[word]
first = True
for id in freq_ta:
freq_id = freq_ta[id]
if (freq_word[0] == freq_id[0] and freq_word[1] == freq_id[1] and freq_word[2] == freq_id[2] \
and freq_word[3] == freq_id[3] and freq_word[4] == freq_id[4]):
if(first):
ent_map[word] = id
first = False
count += 1
else:
# ent_map[word].append(id)
ent_map.pop(word)
count -= 1
break


f = open('./refresh/ent_map.txt', 'w')
for key in ent_map:
f.write(str(key))
f.write('\t')
f.write(str(ent_map[key]))
f.write('\n')

print(count)
81 changes: 81 additions & 0 deletions exp2/rhash/ent_map_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
rel_map = {}
f = open('refresh/rel_map.txt', 'r')
lines = f.readlines()
for line in lines:
temp = line.split()
rel_map[temp[0]] = temp[1]
f.close()

ent_map = {}
f = open('refresh/ent_certain_map_3.txt', 'r')
lines = f.readlines()
for line in lines:
temp = line.split()
ent_map[temp[0]] = temp[1]
f.close()

##########################################
# num = 20466
# f1 = open('data/test.txt' ,'r')
# f2 = open('FB15k-237/test.tsv','r')
# for i in range(20466):
# line1 = f1.readline()
# line2 = f2.readline()
# temp1 = line1.split()
# temp2 = line2.split()
# if temp2[0] not in ent_map:
# ent_map[temp2[0]] = temp1[0]

###################################\

f = open('FB15k-237/train.tsv' ,'r')
f1 = open('data/train.txt' ,'r')
lines = f.readlines()
lines_ta = f1.readlines()

count = 0
for line in lines:
count += 1
if(count % 100 == 0):
print(count)
temp = line.split()
if(temp[1] not in rel_map):
print('error')
exit(-1)
if(temp[0] in ent_map and temp[2] in ent_map):
continue
if(temp[0] in ent_map):
ent1 = ent_map[temp[0]]
rel = rel_map[temp[1]]
first = True
for line_ta in lines_ta:
temp_ta = line_ta.split()
if(ent1 == temp_ta[0] and rel == temp_ta[1]):
if(first):
ent_map[temp[2]] = temp_ta[2]
first = False
else:
ent_map.pop(temp[2])
break

elif(temp[2] in ent_map):
ent2 = ent_map[temp[2]]
rel = rel_map[temp[1]]
first = True
for line_ta in lines_ta:
temp_ta = line_ta.split()
if(ent2 == temp_ta[2] and rel == temp_ta[1]):
if(first):
ent_map[temp[0]] = temp_ta[0]
first = False
else:
ent_map.pop(temp[0])
break


f = open('./refresh/ent_certain_map_3.txt', 'w')
for key in ent_map:
f.write(str(key))
f.write('\t')
f.write(str(ent_map[key]))
f.write('\n')
Loading

0 comments on commit e8796c3

Please sign in to comment.