Skip to content

Commit

Permalink
add bleu score per epochs codes
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunwoongko committed Dec 21, 2019
1 parent f932323 commit ded5bcf
Show file tree
Hide file tree
Showing 12 changed files with 152 additions and 90 deletions.
21 changes: 12 additions & 9 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 12 additions & 8 deletions graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,25 @@ def read(name):
return [float(i) for idx, i in enumerate(file.split(','))]


def draw():
train = read('./result/train.txt')
test = read('./result/test.txt')
def draw(mode):
if mode == 'loss':
train = read('./result/train_loss.txt')
test = read('./result/test_loss.txt')
plt.plot(train, 'r', label='train')
plt.plot(test, 'b', label='validation')

plt.plot(train, 'r', label='train')
plt.plot(test, 'b', label='validation')
elif mode == 'bleu':
bleu = read('./result/bleu.txt')
plt.plot(bleu, 'b', label='bleu score')

plt.xlabel('epoch')
plt.ylabel('loss')
plt.ylabel(mode)
plt.title('training result')
plt.grid(True, which='both', axis='both')
plt.legend(loc='lower left')

plt.show()


if __name__ == '__main__':
draw()
draw(mode='loss')
draw(mode='bleu')
1 change: 1 addition & 0 deletions result/bleu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[0.2501438161546564, 0.13948406463256255]
1 change: 0 additions & 1 deletion result/test.txt

This file was deleted.

1 change: 1 addition & 0 deletions result/test_loss.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[8.388485550880432, 7.465105414390564]
1 change: 0 additions & 1 deletion result/train.txt

This file was deleted.

1 change: 1 addition & 0 deletions result/train_loss.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[9.24096984275112, 8.002939659068238]
47 changes: 35 additions & 12 deletions retrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

from data import *
from models.model.transformer import Transformer
from util.bleu import idx_to_word, get_bleu
from util.epoch_timer import epoch_time


def load_loss_record(path):
def load_record(path):
f = open(path, 'r')
losses = f.read()
losses = re.sub('\\]', '', losses)
Expand All @@ -29,8 +30,9 @@ def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)


train_losses, train_count = load_loss_record('./result/train.txt')
test_losses, _ = load_loss_record('./result/test.txt')
train_losses, train_count = load_record('./result/train_loss.txt')
test_losses, _ = load_record('./result/test_loss.txt')
bleus, _ = load_record('./result/bleu.txt')
epoch -= train_count

model = Transformer(src_pad_idx=src_pad_idx,
Expand Down Expand Up @@ -88,50 +90,71 @@ def train(model, iterator, optimizer, criterion, clip):
def evaluate(model, iterator, criterion):
model.eval()
epoch_loss = 0

batch_bleu = []
with torch.no_grad():
for i, batch in enumerate(iterator):
src = batch.src
trg = batch.trg
output = model(src, trg[:, :-1])
output = output.contiguous().view(-1, output.shape[-1])
output_reshape = output.contiguous().view(-1, output.shape[-1])
trg = trg[:, 1:].contiguous().view(-1)

loss = criterion(output, trg)
loss = criterion(output_reshape, trg)
epoch_loss += loss.item()

return epoch_loss / len(iterator)
total_bleu = []
for j in range(batch_size):
try:
trg_words = idx_to_word(batch.trg[j], loader.target.vocab)
output_words = output[j].max(dim=1)[1]
output_words = idx_to_word(output_words, loader.target.vocab)
bleu = get_bleu(hypotheses=output_words.split(), reference=trg_words.split())
total_bleu.append(bleu)
except:
pass

total_bleu = sum(total_bleu) / len(total_bleu)
batch_bleu.append(total_bleu)

batch_bleu = sum(batch_bleu) / len(batch_bleu)
return epoch_loss / len(iterator), batch_bleu


def run(total_epoch, best_loss):
for step in range(total_epoch):
start_time = time.time()
train_loss = train(model, train_iter, optimizer, criterion, clip)
valid_loss = evaluate(model, valid_iter, criterion)
valid_loss, bleu = evaluate(model, valid_iter, criterion)
end_time = time.time()

if step > warmup:
scheduler.step(valid_loss)

train_losses.append(train_loss)
test_losses.append(valid_loss)
bleus.append(bleu)
epoch_mins, epoch_secs = epoch_time(start_time, end_time)

if valid_loss < best_loss:
best_loss = valid_loss
torch.save(model.state_dict(), 'saved/model-{0}.pt'.format(valid_loss))

f = open('result/train.txt', 'w')
f = open('result/train_loss.txt', 'w')
f.write(str(train_losses))
f.close()

f = open('result/test.txt', 'w')
f = open('result/bleu.txt', 'w')
f.write(str(bleus))
f.close()

f = open('result/test_loss.txt', 'w')
f.write(str(test_losses))
f.close()

print(f'Epoch: {step + 1 + train_count} | Time: {epoch_mins}m {epoch_secs}s')
print(f'Epoch: {step + 1} | Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}')
print(f'\tVal Loss: {valid_loss:.3f} | Val PPL: {math.exp(valid_loss):7.3f}')
print(f'\tBLEU Score: {bleu:.3f}')


if __name__ == '__main__':
Expand Down
48 changes: 1 addition & 47 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from data import *
from models.model.transformer import Transformer
from util.bleu import get_bleu, idx_to_word


def count_parameters(model):
Expand Down Expand Up @@ -69,52 +70,5 @@ def test_model(num_examples):
print('TOTAL BLEU SCORE = {}'.format(batch_bleu))


def bleu_stats(hypothesis, reference):
"""Compute statistics for BLEU."""
stats = []
stats.append(len(hypothesis))
stats.append(len(reference))
for n in range(1, 5):
s_ngrams = Counter(
[tuple(hypothesis[i:i + n]) for i in range(len(hypothesis) + 1 - n)]
)
r_ngrams = Counter(
[tuple(reference[i:i + n]) for i in range(len(reference) + 1 - n)]
)

stats.append(max([sum((s_ngrams & r_ngrams).values()), 0]))
stats.append(max([len(hypothesis) + 1 - n, 0]))
return stats


def bleu(stats):
"""Compute BLEU given n-gram statistics."""
if len(list(filter(lambda x: x == 0, stats))) > 0:
return 0
(c, r) = stats[:2]
log_bleu_prec = sum(
[math.log(float(x) / y) for x, y in zip(stats[2::2], stats[3::2])]
) / 4.
return math.exp(min([0, 1 - float(r) / c]) + log_bleu_prec)


def get_bleu(hypotheses, reference):
"""Get validation BLEU score for dev set."""
stats = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
for hyp, ref in zip(hypotheses, reference):
stats += np.array(bleu_stats(hyp, ref))
return 100 * bleu(stats)


def idx_to_word(x, vocab):
words = []
for i in x:
word = vocab.itos[i]
if '<' not in word:
words.append(word)
words = " ".join(words)
return words


if __name__ == '__main__':
test_model(num_examples=batch_size)
45 changes: 33 additions & 12 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from data import *
from models.model.transformer import Transformer
from util.bleu import idx_to_word, get_bleu
from util.epoch_timer import epoch_time


Expand Down Expand Up @@ -54,17 +55,16 @@ def initialize_weights(m):
def train(model, iterator, optimizer, criterion, clip):
model.train()
epoch_loss = 0

for i, batch in enumerate(iterator):
src = batch.src
trg = batch.trg

optimizer.zero_grad()
output = model(src, trg[:, :-1])
output = output.contiguous().view(-1, output.shape[-1])
output_reshape = output.contiguous().view(-1, output.shape[-1])
trg = trg[:, 1:].contiguous().view(-1)

loss = criterion(output, trg)
loss = criterion(output_reshape, trg)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
Expand All @@ -78,51 +78,72 @@ def train(model, iterator, optimizer, criterion, clip):
def evaluate(model, iterator, criterion):
model.eval()
epoch_loss = 0

batch_bleu = []
with torch.no_grad():
for i, batch in enumerate(iterator):
src = batch.src
trg = batch.trg
output = model(src, trg[:, :-1])
output = output.contiguous().view(-1, output.shape[-1])
output_reshape = output.contiguous().view(-1, output.shape[-1])
trg = trg[:, 1:].contiguous().view(-1)

loss = criterion(output, trg)
loss = criterion(output_reshape, trg)
epoch_loss += loss.item()

return epoch_loss / len(iterator)
total_bleu = []
for j in range(batch_size):
try:
trg_words = idx_to_word(batch.trg[j], loader.target.vocab)
output_words = output[j].max(dim=1)[1]
output_words = idx_to_word(output_words, loader.target.vocab)
bleu = get_bleu(hypotheses=output_words.split(), reference=trg_words.split())
total_bleu.append(bleu)
except:
pass

total_bleu = sum(total_bleu) / len(total_bleu)
batch_bleu.append(total_bleu)

batch_bleu = sum(batch_bleu) / len(batch_bleu)
return epoch_loss / len(iterator), batch_bleu


def run(total_epoch, best_loss):
train_losses, test_losses = [], []
train_losses, test_losses, bleus = [], [], []
for step in range(total_epoch):
start_time = time.time()
train_loss = train(model, train_iter, optimizer, criterion, clip)
valid_loss = evaluate(model, valid_iter, criterion)
valid_loss, bleu = evaluate(model, valid_iter, criterion)
end_time = time.time()

if step > warmup:
scheduler.step(valid_loss)

train_losses.append(train_loss)
test_losses.append(valid_loss)
bleus.append(bleu)
epoch_mins, epoch_secs = epoch_time(start_time, end_time)

if valid_loss < best_loss:
best_loss = valid_loss
torch.save(model.state_dict(), 'saved/model-{0}.pt'.format(valid_loss))

f = open('result/train.txt', 'w')
f = open('result/train_loss.txt', 'w')
f.write(str(train_losses))
f.close()

f = open('result/test.txt', 'w')
f = open('result/bleu.txt', 'w')
f.write(str(bleus))
f.close()

f = open('result/test_loss.txt', 'w')
f.write(str(test_losses))
f.close()

print(f'Epoch: {step + 1} | Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}')
print(f'\tVal Loss: {valid_loss:.3f} | Val PPL: {math.exp(valid_loss):7.3f}')
print(f'\tBLEU Score: {bleu:.3f}')


if __name__ == '__main__':
Expand Down
Binary file added util/__pycache__/bleu.cpython-36.pyc
Binary file not shown.
Loading

0 comments on commit ded5bcf

Please sign in to comment.