This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import cv2 | |
import numpy as np | |
def normalize_kernel(kernel, k_width, k_height, scaling_factor = 1.0): | |
'''Zero-summing normalize kernel''' | |
K_EPS = 1.0e-12 | |
# positive and negative sum of kernel values | |
pos_range, neg_range = 0, 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(x, temp): | |
probs = F.softmax(x / temp, dim = 0) | |
probs = np.squeeze(probs.detach().cpu().numpy()) | |
ind = np.random.choice(vocab_len, 1, p = probs) | |
return ind[0] | |
generated_text = ['there','is','no','one','love'] | |
curr_len = 0 | |
embeds = [] | |
is_end = word_to_int[';'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_batches_x(tot_seq, batch_size): | |
ind = np.random.permutation(tot_seq).tolist() | |
i = 0 | |
for i in range(0, tot_seq, batch_size): | |
batch_ids = ind[i:i+batch_size] | |
yield X[batch_ids], Y[batch_ids] | |
class Quote_Generator(nn.Module): | |
def __init__(self, embed_size, hidden_size, vocab_len): | |
super(Quote_Generator, self).__init__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
window = 5 # max_seq_length | |
sequences, next_words = [], [] | |
for quote in quotes: | |
words = quote.split(' ') | |
for i in range(0,len(words) - window + 1): | |
sequences.append(words[i:i+window]) | |
if (i + window) < len(words): | |
next_words.append(words[i+window]) | |
else: | |
next_words.append(';') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tsne = TSNE(n_components=2, perplexity=40, n_iter=300,metric='cosine') | |
n_points = 100 | |
emb_tsne = tsne.fit_transform(emb[:n_points, :]) | |
labels = list(word_to_int.keys())[:n_points] | |
x = emb_tsne[:,0] | |
y = emb_tsne[:,1] | |
plt.figure(figsize=(16, 16)) | |
for i in range(n_points): | |
plt.scatter(x[i],y[i]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def f_x(x, x_max, alpha): | |
x = (x/x_max)**alpha | |
return torch.min(x, torch.ones_like(x)).to(device) | |
def weight_mse(w_x, x, log_x): | |
loss = w_x * F.mse_loss(x, log_x, reduction='none') | |
return torch.mean(loss).to(device) | |
def glove_train(glove): | |
epochs = 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_batch(batch_size): | |
ind = np.random.permutation(occs.size).tolist() | |
i = 0 | |
for i in range(0, tot_pairs, batch_size): | |
batch_ids = ind[i:i+batch_size] | |
yield p1[batch_ids], p2[batch_ids], occs[batch_ids] | |
device = None | |
if torch.cuda.is_available(): | |
device = torch.device("cuda:0") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tot_pairs = len(co_occ_matrix) | |
p1, p2,occs = list(), list(), list() | |
for i in co_occ_matrix.keys(): | |
p1.append(i[0]) | |
p2.append(i[1]) | |
occs.append(co_occ_matrix[i]) | |
p1 = np.array(p1) | |
p2 = np.array(p2) | |
occs = np.array(occs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
co_occ_matrix = defaultdict(int) | |
window = 5 # not greater than 5 | |
for sent in quotes: | |
words = sent.split(' ') | |
# first window | |
for i in range(0, window): | |
for j in range(i+1, window): | |
weight = 1/(j-i) | |
ind_1 = word_to_int[words[i]] | |
ind_2 = word_to_int[words[j]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
words_dict = dict() | |
def unique_words(data_list): | |
for x in data_list: | |
for word in x.split(' '): | |
words_dict[word] = 1 | |
return words_dict.keys() | |
words = list(unique_words(quotes)) | |
words.sort() |
NewerOlder