implement train & validation code

hyunwoongko · Oct 28, 2019 · d5a4d5a · d5a4d5a
1 parent 1c7984c
commit d5a4d5a
Show file tree

Hide file tree

Showing 59 changed files with 617 additions and 437 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 ## dataset ignore
 data/
-venv/
+venv/
+.data/
diff --git a/.idea/dictionaries/User.xml b/.idea/dictionaries/User.xml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/__pycache__/conf.cpython-36.pyc b/__pycache__/conf.cpython-36.pyc
diff --git a/__pycache__/data.cpython-36.pyc b/__pycache__/data.cpython-36.pyc
diff --git a/conf.py b/conf.py
@@ -0,0 +1,28 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-22
+@homepage : https://github.com/gusdnd852
+"""
+import torch
+
+# GPU device setting
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+# model parameter setting
+batch_size = 128
+max_len = 512
+d_model = 512
+sinusoid = True
+d_k = d_v = d_model * 4
+n_layers = 6
+n_heads = 8
+drop_prob = 0.1
+
+# optimizer parameter setting
+warmup = 200
+factor = 0.8
+init_lr = 1e-4
+weight_decay = 5e-4
+epoch = 20
+clip = 1
+inf = float('inf')
diff --git a/data.py b/data.py
@@ -0,0 +1,26 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-29
+@homepage : https://github.com/gusdnd852
+"""
+from conf import *
+from util.data_loader import DataLoader
+from util.tokenizer import Tokenizer
+
+tokenizer = Tokenizer()
+loader = DataLoader(ext=('.en', '.de'),
+ tokenize_en=tokenizer.tokenize_en,
+ tokenize_de=tokenizer.tokenize_de,
+ init_token='<sos>',
+ eos_token='<eos>')
+
+train, valid, test = loader.make_dataset()
+loader.build_vocab(train_data=train, min_freq=2)
+train_iter, valid_iter, test_iter = loader.make_iter(train, valid, test,
+ batch_size=batch_size,
+ device=device)
+
+pad_idx = loader.source.vocab.stoi['<pad>']
+sos_idx = loader.source.vocab.stoi['<sos>']
+enc_voc_size = len(loader.source.vocab)
+dec_voc_size = len(loader.target.vocab)
diff --git a/src/__init__.py → models/__init__.py b/src/__init__.py → models/__init__.py
@@ -1,5 +1,5 @@
 """
 @author : Hyunwoong
-@when : 8/21/2019
+@when : 2019-10-22
 @homepage : https://github.com/gusdnd852
 """
diff --git a/models/__pycache__/__init__.cpython-36.pyc b/models/__pycache__/__init__.cpython-36.pyc
diff --git a/models/__pycache__/transformer.cpython-36.pyc b/models/__pycache__/transformer.cpython-36.pyc
diff --git a/src/model/attention_layers/__init__.py → models/blocks/__init__.py b/src/model/attention_layers/__init__.py → models/blocks/__init__.py
@@ -1,5 +1,5 @@
 """
 @author : Hyunwoong
-@when : 8/21/2019
+@when : 2019-10-22
 @homepage : https://github.com/gusdnd852
 """
diff --git a/src/model/main_layers/decoder.py → models/blocks/decoder.py b/src/model/main_layers/decoder.py → models/blocks/decoder.py
@@ -1,6 +1,6 @@
 """
 @author : Hyunwoong
-@when : 8/26/2019
+@when : 2019-10-24
 @homepage : https://github.com/gusdnd852
 """
 from torch import nn

diff --git a/models/blocks/encoder.py b/models/blocks/encoder.py
@@ -0,0 +1,29 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-24
+@homepage : https://github.com/gusdnd852
+"""
+from torch import nn
+
+
+class Encoder(nn.Module):
+
+ def __init__(self):
+ super(Encoder, self).__init__()
+ self.multi_head_attention = None
+ self.layer_normalization = nn.LayerNorm()
+ self.feed_forward = None
+ self.drop_out = nn.Dropout(p=None)
+
+
+ def forward(self, x):
+ shortcut = x
+ x = self.multi_head_attention(x, x, x)
+ x += shortcut
+ x = self.layer_normalization(x)
+
+ shortcut = x
+ x = self.feed_forward(x)
+ x += shortcut
+ x = self.layer_normalization(x)
+ return x
diff --git a/src/model/__init__.py → models/embedding/__init__.py b/src/model/__init__.py → models/embedding/__init__.py
@@ -1,5 +1,5 @@
 """
 @author : Hyunwoong
-@when : 8/21/2019
+@when : 2019-10-22
 @homepage : https://github.com/gusdnd852
 """
diff --git a/models/embedding/__pycache__/__init__.cpython-36.pyc b/models/embedding/__pycache__/__init__.cpython-36.pyc
diff --git a/models/embedding/__pycache__/positional_encoding.cpython-36.pyc b/models/embedding/__pycache__/positional_encoding.cpython-36.pyc
diff --git a/models/embedding/__pycache__/token_embeddings.cpython-36.pyc b/models/embedding/__pycache__/token_embeddings.cpython-36.pyc
diff --git a/models/embedding/__pycache__/transformer_embedding.cpython-36.pyc b/models/embedding/__pycache__/transformer_embedding.cpython-36.pyc
diff --git a/models/embedding/positional_encoding.py b/models/embedding/positional_encoding.py
@@ -0,0 +1,44 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-22
+@homepage : https://github.com/gusdnd852
+"""
+
+from torch import nn
+
+from conf import *
+
+
+class PostionalEncoding(nn.Module):
+ """
+ compute sinusoid encoding.
+ """
+
+ def __init__(self, d_model, max_len):
+ """
+ constructor of sinusoid encoding class
+
+ :param d_model: dimension of model
+ :param max_len: max sequence length
+ """
+ super(PostionalEncoding, self).__init__()
+
+ # same size with input matrix (for adding with input matrix)
+ self.encoding = torch.zeros(max_len, d_model, device=conf_device, requires_grad=False)
+
+ pos = torch.arange(0, max_len, device=conf_device)
+ pos = pos.float().unsqueeze(dim=1)
+ # 1D => 2D unsqueeze to represent word's position
+
+ _2i = torch.arange(0, d_model, step=2, device=conf_device).float()
+ # 'i' means index of d_model (e.g. embedding size = 50, 'i' = [0,50])
+ # "step=2" means 'i' multiplied with two (same with 2 * i)
+
+ pos = pos / 10000 ** (_2i / d_model)
+ # compute position information (same with original paper)
+
+ self.encoding[:, 0::2] = torch.sin(pos) # if 'i' is even [0, 2, 4, ... ] => sin
+ self.encoding[:, 1::2] = torch.cos(pos) # if 'i' is odd [1, 3, 5, ... ] => cos
+
+ def forward(self, x):
+ return self.encoding[:, :x.size(1)]
diff --git a/models/embedding/token_embeddings.py b/models/embedding/token_embeddings.py
@@ -0,0 +1,22 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-24
+@homepage : https://github.com/gusdnd852
+"""
+from torch import nn
+
+
+class TokenEmbedding(nn.Embedding):
+ """
+ Token Embedding using torch.nn
+ they will dense representation of word using weighted matrix
+ """
+
+ def __init__(self, vocab_size, d_model):
+ """
+ class for token embedding that included positional information
+
+ :param vocab_size: size of vocabulary
+ :param d_model: dimensions of model
+ """
+ super(TokenEmbedding, self).__init__(vocab_size, d_model, padding_idx=0)
diff --git a/models/embedding/transformer_embedding.py b/models/embedding/transformer_embedding.py
@@ -0,0 +1,33 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-22
+@homepage : https://github.com/gusdnd852
+"""
+from torch import nn
+
+from models.embedding.positional_encoding import PostionalEncoding
+from models.embedding.token_embeddings import TokenEmbedding
+
+
+class TransformerEmbedding(nn.Module):
+ """
+ token embedding + positional encoding (sinusoid)
+ positional encoding can give positional information to network
+ """
+
+ def __init__(self, vocab_size, d_model, drop_prob):
+ """
+ class for word embedding that included positional information
+
+ :param vocab_size: size of vocabulary
+ :param d_model: dimensions of model
+ """
+ super(TransformerEmbedding, self).__init__()
+ self.tok_emb = TokenEmbedding(vocab_size, d_model)
+ self.pos_emb = PostionalEncoding(d_model)
+ self.drop_out = nn.Dropout(p=drop_prob)
+
+ def forward(self, x):
+ embedding = self.tok_emb(x) + self.pos_emb(x)
+ embedding = self.drop_out(embedding)
+ return embedding
diff --git a/src/model/main_layers/__init__.py → models/layers/__init__.py b/src/model/main_layers/__init__.py → models/layers/__init__.py
@@ -1,5 +1,5 @@
 """
 @author : Hyunwoong
-@when : 8/21/2019
+@when : 2019-10-22
 @homepage : https://github.com/gusdnd852
 """
diff --git a/models/layers/multi_head_attention.py b/models/layers/multi_head_attention.py
@@ -0,0 +1,5 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-25
+@homepage : https://github.com/gusdnd852
+"""
diff --git a/models/layers/scale_dot_product_attention.py b/models/layers/scale_dot_product_attention.py
@@ -0,0 +1,25 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-22
+@homepage : https://github.com/gusdnd852
+"""
+from torch import nn
+
+
+class ScaleDotProductAttention(nn.Module):
+ """
+ compute scale dot product attention
+
+ Query : given sentence that we focused on (decoder)
+ Key : every sentence to check relationship with Qeury(encoder)
+ Value : every sentence same with Key (encoder)
+ """
+
+ def __init__(self):
+ super(ScaleDotProductAttention, self).__init__()
+ self.softmax = nn.Softmax()
+ self.dropout = nn.Dropout()
+
+ def forward(self, q, k, v, mask=None, drop_prob=0.1):
+ d_k = k.size()
+ pass
diff --git a/models/transformer.py b/models/transformer.py
@@ -0,0 +1,24 @@
+"""
+@author : Hyunwoong
+@when : 2019-10-22
+@homepage : https://github.com/gusdnd852
+"""
+from torch import nn
+
+from models.embedding.transformer_embedding import TransformerEmbedding
+
+
+class Transformer(nn.Module):
+ def __init__(self, enc_voc_size, dec_voc_size, d_model, drop_prob):
+ super(Transformer, self).__init__()
+ self.enc_embedding = TransformerEmbedding(vocab_size=enc_voc_size,
+ d_model=d_model,
+ drop_prob=drop_prob)
+
+ self.dec_embedding = TransformerEmbedding(vocab_size=dec_voc_size,
+ d_model=d_model,
+ drop_prob=drop_prob)
+
+ def forward(self, x):
+ x = self.enc_embedding(x)
+ return x
diff --git a/src/__pycache__/__init__.cpython-36.pyc b/src/__pycache__/__init__.cpython-36.pyc
diff --git a/src/model/__pycache__/__init__.cpython-36.pyc b/src/model/__pycache__/__init__.cpython-36.pyc
diff --git a/src/model/__pycache__/transformer.cpython-36.pyc b/src/model/__pycache__/transformer.cpython-36.pyc
diff --git a/src/model/attention_layers/multi_head_attention.py b/src/model/attention_layers/multi_head_attention.py