-
Notifications
You must be signed in to change notification settings - Fork 15
/
cui2vec-converter.py
30 lines (22 loc) · 908 Bytes
/
cui2vec-converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
import numpy as np
import re
cui_vecs_df = pd.read_csv('data/cui2vec_pretrained.csv')
cui_vecs_df.set_index('Unnamed: 0', inplace=True)
x = cui_vecs_df.to_string(header=False,index=True, index_names=False)
tfile = open('data/cui2vec_pretrained.txt', 'a')
tfile.write(x)
tfile.close()
# read in the 50-dimensional GloVe vectors
def read_glove_vecs(file):
with open(file, 'r') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
word = line[0]
words.add(word)
word_to_vec_map[word] = np.array(line[1:], dtype=np.float64)
return words, word_to_vec_map
words, word_to_vec_map = read_glove_vecs('data/cui2vec_pretrained.txt')
print("now convert to magnitude python -m pymagnitude.converter -i ./cui2vec_pretrained.txt -o ./cui2vec_pretrained.magnitude")