-
Notifications
You must be signed in to change notification settings - Fork 394
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1c7984c
commit d5a4d5a
Showing
59 changed files
with
617 additions
and
437 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
## dataset ignore | ||
data/ | ||
venv/ | ||
venv/ | ||
.data/ |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
import torch | ||
|
||
# GPU device setting | ||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||
|
||
# model parameter setting | ||
batch_size = 128 | ||
max_len = 512 | ||
d_model = 512 | ||
sinusoid = True | ||
d_k = d_v = d_model * 4 | ||
n_layers = 6 | ||
n_heads = 8 | ||
drop_prob = 0.1 | ||
|
||
# optimizer parameter setting | ||
warmup = 200 | ||
factor = 0.8 | ||
init_lr = 1e-4 | ||
weight_decay = 5e-4 | ||
epoch = 20 | ||
clip = 1 | ||
inf = float('inf') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-29 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
from conf import * | ||
from util.data_loader import DataLoader | ||
from util.tokenizer import Tokenizer | ||
|
||
tokenizer = Tokenizer() | ||
loader = DataLoader(ext=('.en', '.de'), | ||
tokenize_en=tokenizer.tokenize_en, | ||
tokenize_de=tokenizer.tokenize_de, | ||
init_token='<sos>', | ||
eos_token='<eos>') | ||
|
||
train, valid, test = loader.make_dataset() | ||
loader.build_vocab(train_data=train, min_freq=2) | ||
train_iter, valid_iter, test_iter = loader.make_iter(train, valid, test, | ||
batch_size=batch_size, | ||
device=device) | ||
|
||
pad_idx = loader.source.vocab.stoi['<pad>'] | ||
sos_idx = loader.source.vocab.stoi['<sos>'] | ||
enc_voc_size = len(loader.source.vocab) | ||
dec_voc_size = len(loader.target.vocab) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 8/21/2019 | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" |
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion
2
src/model/attention_layers/__init__.py → models/blocks/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 8/21/2019 | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-24 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
from torch import nn | ||
|
||
|
||
class Encoder(nn.Module): | ||
|
||
def __init__(self): | ||
super(Encoder, self).__init__() | ||
self.multi_head_attention = None | ||
self.layer_normalization = nn.LayerNorm() | ||
self.feed_forward = None | ||
self.drop_out = nn.Dropout(p=None) | ||
|
||
|
||
def forward(self, x): | ||
shortcut = x | ||
x = self.multi_head_attention(x, x, x) | ||
x += shortcut | ||
x = self.layer_normalization(x) | ||
|
||
shortcut = x | ||
x = self.feed_forward(x) | ||
x += shortcut | ||
x = self.layer_normalization(x) | ||
return x |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 8/21/2019 | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
|
||
from torch import nn | ||
|
||
from conf import * | ||
|
||
|
||
class PostionalEncoding(nn.Module): | ||
""" | ||
compute sinusoid encoding. | ||
""" | ||
|
||
def __init__(self, d_model, max_len): | ||
""" | ||
constructor of sinusoid encoding class | ||
:param d_model: dimension of model | ||
:param max_len: max sequence length | ||
""" | ||
super(PostionalEncoding, self).__init__() | ||
|
||
# same size with input matrix (for adding with input matrix) | ||
self.encoding = torch.zeros(max_len, d_model, device=conf_device, requires_grad=False) | ||
|
||
pos = torch.arange(0, max_len, device=conf_device) | ||
pos = pos.float().unsqueeze(dim=1) | ||
# 1D => 2D unsqueeze to represent word's position | ||
|
||
_2i = torch.arange(0, d_model, step=2, device=conf_device).float() | ||
# 'i' means index of d_model (e.g. embedding size = 50, 'i' = [0,50]) | ||
# "step=2" means 'i' multiplied with two (same with 2 * i) | ||
|
||
pos = pos / 10000 ** (_2i / d_model) | ||
# compute position information (same with original paper) | ||
|
||
self.encoding[:, 0::2] = torch.sin(pos) # if 'i' is even [0, 2, 4, ... ] => sin | ||
self.encoding[:, 1::2] = torch.cos(pos) # if 'i' is odd [1, 3, 5, ... ] => cos | ||
|
||
def forward(self, x): | ||
return self.encoding[:, :x.size(1)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-24 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
from torch import nn | ||
|
||
|
||
class TokenEmbedding(nn.Embedding): | ||
""" | ||
Token Embedding using torch.nn | ||
they will dense representation of word using weighted matrix | ||
""" | ||
|
||
def __init__(self, vocab_size, d_model): | ||
""" | ||
class for token embedding that included positional information | ||
:param vocab_size: size of vocabulary | ||
:param d_model: dimensions of model | ||
""" | ||
super(TokenEmbedding, self).__init__(vocab_size, d_model, padding_idx=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
from torch import nn | ||
|
||
from models.embedding.positional_encoding import PostionalEncoding | ||
from models.embedding.token_embeddings import TokenEmbedding | ||
|
||
|
||
class TransformerEmbedding(nn.Module): | ||
""" | ||
token embedding + positional encoding (sinusoid) | ||
positional encoding can give positional information to network | ||
""" | ||
|
||
def __init__(self, vocab_size, d_model, drop_prob): | ||
""" | ||
class for word embedding that included positional information | ||
:param vocab_size: size of vocabulary | ||
:param d_model: dimensions of model | ||
""" | ||
super(TransformerEmbedding, self).__init__() | ||
self.tok_emb = TokenEmbedding(vocab_size, d_model) | ||
self.pos_emb = PostionalEncoding(d_model) | ||
self.drop_out = nn.Dropout(p=drop_prob) | ||
|
||
def forward(self, x): | ||
embedding = self.tok_emb(x) + self.pos_emb(x) | ||
embedding = self.drop_out(embedding) | ||
return embedding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 8/21/2019 | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-25 | ||
@homepage : https://github.com/gusdnd852 | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
from torch import nn | ||
|
||
|
||
class ScaleDotProductAttention(nn.Module): | ||
""" | ||
compute scale dot product attention | ||
Query : given sentence that we focused on (decoder) | ||
Key : every sentence to check relationship with Qeury(encoder) | ||
Value : every sentence same with Key (encoder) | ||
""" | ||
|
||
def __init__(self): | ||
super(ScaleDotProductAttention, self).__init__() | ||
self.softmax = nn.Softmax() | ||
self.dropout = nn.Dropout() | ||
|
||
def forward(self, q, k, v, mask=None, drop_prob=0.1): | ||
d_k = k.size() | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
""" | ||
@author : Hyunwoong | ||
@when : 2019-10-22 | ||
@homepage : https://github.com/gusdnd852 | ||
""" | ||
from torch import nn | ||
|
||
from models.embedding.transformer_embedding import TransformerEmbedding | ||
|
||
|
||
class Transformer(nn.Module): | ||
def __init__(self, enc_voc_size, dec_voc_size, d_model, drop_prob): | ||
super(Transformer, self).__init__() | ||
self.enc_embedding = TransformerEmbedding(vocab_size=enc_voc_size, | ||
d_model=d_model, | ||
drop_prob=drop_prob) | ||
|
||
self.dec_embedding = TransformerEmbedding(vocab_size=dec_voc_size, | ||
d_model=d_model, | ||
drop_prob=drop_prob) | ||
|
||
def forward(self, x): | ||
x = self.enc_embedding(x) | ||
return x |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.