Skip to content

Commit

Permalink
implement encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunwoongko committed Dec 17, 2019
1 parent 64e83bc commit 10c0a9d
Show file tree
Hide file tree
Showing 21 changed files with 175 additions and 28 deletions.
20 changes: 16 additions & 4 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified __pycache__/conf.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/train.cpython-36.pyc
Binary file not shown.
3 changes: 2 additions & 1 deletion conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model parameter setting
batch_size = 128
batch_size = 32
max_len = 512
d_model = 512
sinusoid = True
d_k = d_v = d_model * 4
n_layers = 6
n_heads = 8
ffn_hidden = 256
drop_prob = 0.1

# optimizer parameter setting
Expand Down
Binary file modified models/__pycache__/transformer.cpython-36.pyc
Binary file not shown.
Binary file added models/blocks/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added models/blocks/__pycache__/encoder.cpython-36.pyc
Binary file not shown.
33 changes: 19 additions & 14 deletions models/blocks/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,30 @@
"""
from torch import nn

from models.layers.layer_norm import LayerNorm
from models.layers.multi_head_attention import MultiHeadAttention
from models.layers.position_wise_feed_forward import PositionwiseFeedForward


class Encoder(nn.Module):

def __init__(self):
def __init__(self, d_model, ffn_hidden, n_head, drop_prob):
super(Encoder, self).__init__()
self.multi_head_attention = None
self.layer_normalization = nn.LayerNorm()
self.feed_forward = None
self.drop_out = nn.Dropout(p=None)

self.attention = MultiHeadAttention(d_model=d_model, n_head=n_head)
self.norm = LayerNorm(d_model=d_model)
self.feed_forward = PositionwiseFeedForward(d_model=d_model, hidden=ffn_hidden)
self.drop_out = nn.Dropout(p=drop_prob)

def forward(self, x):
shortcut = x
x = self.multi_head_attention(x, x, x)
x += shortcut
x = self.layer_normalization(x)
_x = x
x = self.attention(x, x, x)
x += x
x = self.norm(x)

shortcut = x
_x = x
x = self.feed_forward(x)
x += shortcut
x = self.layer_normalization(x)
return x
x += x
x = self.norm(x)

out = self.drop_out(x)
return out
Binary file modified models/embedding/__pycache__/positional_encoding.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file added models/layers/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added models/layers/__pycache__/layer_norm.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
24 changes: 24 additions & 0 deletions models/layers/layer_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
@author : Hyunwoong
@when : 2019-12-18
@homepage : https://github.com/gusdnd852
"""
import torch
from torch import nn


class LayerNorm(nn.Module):
def __init__(self, d_model, eps=1e-12):
super(LayerNorm, self).__init__()
self.gamma = nn.Parameter(torch.ones(d_model))
self.beta = nn.Parameter(torch.zeros(d_model))
self.eps = eps

def forward(self, x):
mean = x.mean(-1, keepdim=True)
std = x.std(-1, keepdim=True)

out = self.gamma * (x - mean)
out /= (std + self.eps)
out += self.beta
return out
58 changes: 58 additions & 0 deletions models/layers/multi_head_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,61 @@
@when : 2019-10-25
@homepage : https://github.com/gusdnd852
"""
from torch import nn

from models.layers.scale_dot_product_attention import ScaleDotProductAttention


class MultiHeadAttention(nn.Module):

def __init__(self, d_model, n_head):
super(MultiHeadAttention, self).__init__()
self.n_head = n_head
self.attention = ScaleDotProductAttention()
self.w_q = nn.Linear(d_model, d_model)
self.w_k = nn.Linear(d_model, d_model)
self.w_v = nn.Linear(d_model, d_model)

def forward(self, q, k, v, mask=None):
# 1. dot product with weight matrices
q, k, v = self.w_q(q), self.w_k(k), self.w_v(v)

# 2. split tensor by number of heads
q, k, v = self.project(q), self.project(k), self.project(v)

# 3, do scale dot product to compute similarity
out, attention = self.attention(q, k, v, mask=mask)
out = self.concat(out)

# 4. visualize attention map
# TODO : we should implement visualization

return out

def project(self, tensor):
"""
split tensor by number of head
:param tensor: [batch_size, length, d_model]
:return: [batch_size, length, d_tensor, head]
"""
batch_size, length, d_model = tensor.size()

d_tensor = d_model // self.n_head
tensor = tensor.view(batch_size, self.n_head, length, d_tensor)
# it is similar with group convolution (split by number of heads)

return tensor

def concat(self, tensor):
"""
inverse function of self.project(tensor : torch.Tensor)
:param tensor: [batch_size, length, d_tensor, head]
:return: [batch_size, length, d_model]
"""
batch_size, head, length, d_tensor = tensor.size()
d_model = head * d_tensor

tensor = tensor.view(batch_size, length, d_model)
return tensor
21 changes: 21 additions & 0 deletions models/layers/position_wise_feed_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
@author : Hyunwoong
@when : 2019-12-18
@homepage : https://github.com/gusdnd852
"""
from torch import nn


class PositionwiseFeedForward(nn.Module):

def __init__(self, d_model, hidden):
super(PositionwiseFeedForward, self).__init__()
self.linear1 = nn.Linear(d_model, hidden)
self.linear2 = nn.Linear(hidden, d_model)
self.relu = nn.ReLU()

def forward(self, x):
x = self.linear1(x)
x = self.relu(x)
x = self.linear2(x)
return x
25 changes: 22 additions & 3 deletions models/layers/scale_dot_product_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
@when : 2019-10-22
@homepage : https://github.com/gusdnd852
"""
import math

from torch import nn


Expand All @@ -20,6 +22,23 @@ def __init__(self):
self.softmax = nn.Softmax()
self.dropout = nn.Dropout()

def forward(self, q, k, v, mask=None, drop_prob=0.1):
d_k = k.size()
pass
def forward(self, q, k, v, mask=None, e=1e-12):
# input is 4 dimension tensor
# [batch_size, head, length, d_tensor]
batch_size, head, length, d_tensor = k.size()
d_model = head * d_tensor

# 1. dot product Query with Key^T to compute similarity
k_t = k.view(batch_size, head, d_tensor, length)
score = (q @ k_t) / math.sqrt(d_model)

# 2. apply masking (opt)
if mask is not None: score = score.masked_fill(mask == 0, -e)

# 3. pass them softmax to make [0, 1] range
score = self.softmax(score)

# 4. multiply with Value
v = score @ v

return v, score
16 changes: 10 additions & 6 deletions models/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,28 @@
"""
from torch import nn

from models.blocks.encoder import Encoder
from models.embedding.transformer_embedding import TransformerEmbedding


class Transformer(nn.Module):
def __init__(self, enc_voc_size, dec_voc_size, d_model, max_len, drop_prob, device):
def __init__(self, enc_voc_size, dec_voc_size, d_model,
ffn_hidden, n_layers, n_head, max_len, drop_prob, device):
super(Transformer, self).__init__()
self.enc_embedding = TransformerEmbedding(vocab_size=enc_voc_size,
d_model=d_model,
max_len=max_len,
drop_prob=drop_prob,
device=device)

self.dec_embedding = TransformerEmbedding(vocab_size=dec_voc_size,
d_model=d_model,
max_len=max_len,
drop_prob=drop_prob,
device=device)
self.encoders = nn.Sequential(*[Encoder(d_model=d_model,
drop_prob=drop_prob,
ffn_hidden=ffn_hidden,
n_head=n_head) for _ in range(n_layers)])

def forward(self, source, target):
source = self.enc_embedding(source)
source = self.encoders(source)
print(source.size())

return source
3 changes: 3 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
enc_voc_size=enc_voc_size,
dec_voc_size=dec_voc_size,
max_len=max_len,
ffn_hidden=ffn_hidden,
n_head=n_heads,
n_layers=n_layers,
drop_prob=drop_prob,
device=device).to(device)

Expand Down

0 comments on commit 10c0a9d

Please sign in to comment.