Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #72

Merged
merged 14 commits into from
Nov 24, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implement workflow for training with multiple gpus
  • Loading branch information
jmisilo committed Nov 15, 2022
commit 311dec3e0849d1d674978c4bacb87e9e1803a747
17 changes: 17 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# for tests on Linux machines
FROM python:3.9.13

# Set the working directory to /app
WORKDIR /app

RUN python -m venv venv

RUN . venv/bin/activate

COPY requirements.txt /app/requirements.txt

RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt

COPY /src /app/src

CMD ["python", "-u", "src/training.py"]
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ matplotlib==3.6.0
numpy==1.23.3
pandas==1.5.0
Pillow==9.3.0
torch==1.12.1+cu116
torch==1.12.1
# torch==1.12.1+cu116
tqdm==4.64.1
transformers==4.22.1
wandb==0.13.4
12 changes: 12 additions & 0 deletions src/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* get_loader returns DataLoader object.
'''

import os
import pickle

import numpy as np
Expand All @@ -14,8 +15,19 @@
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer

from utils import download_dataset

class MiniFlickrDataset(Dataset):
def __init__(self, path):
# check if file is file
if not os.path.isfile(path):
print('Dataset file not found. Downloading...')

# create data directory and in it create processed directory
os.makedirs(os.path.dirname(path), exist_ok=True)
# download dataset
download_dataset(path)

with open(path, 'rb') as f:
self.data = pickle.load(f)

Expand Down
4 changes: 2 additions & 2 deletions src/dataset_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CLIP model and processor
preprocessor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch14')
model = CLIPModel.from_pretrained('openai/clip-vit-base-patch14').vision_model.to(device)
preprocessor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
model = CLIPModel.from_pretrained('openai/clip-vit-large-patch14').vision_model.to(device)

# Load dataset
df = pd.read_csv(os.path.join(DATA_PATH, 'raw', 'results.csv'), sep='|')
Expand Down
64 changes: 48 additions & 16 deletions src/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def __init__(self, device='cpu'):

self.device = device

self.preprocessor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch14')
self.model = CLIPModel.from_pretrained('openai/clip-vit-base-patch14').vision_model.to(self.device)
self.preprocessor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
self.model = CLIPModel.from_pretrained('openai/clip-vit-large-patch14').vision_model.to(self.device)

def forward(self, image):
# only one image at a time
Expand All @@ -54,7 +54,8 @@ def __init__(
self,
ep_len,
num_layers,
embed_size,
embed_size_inp,
embed_size_out,
n_heads,
forward_expansion,
dropout,
Expand All @@ -63,35 +64,56 @@ def __init__(
super(Mapping, self).__init__()

self.ep_len = ep_len
self.embed_size = embed_size
self.embed_size_inp = embed_size_inp
self.embed_size_out = embed_size_out

self.device = device

num_layers_inp = num_layers // 2
num_layers_out = num_layers - num_layers_inp

self.transformer_encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(
d_model=embed_size,
d_model=self.embed_size_inp,
nhead=n_heads,
dim_feedforward=self.embed_size_inp*forward_expansion,
dropout=dropout,
batch_first=True,
device=device
),
num_layers=num_layers_inp
).to(self.device)

self.translator = nn.Linear(self.embed_size_inp, self.embed_size_out).to(self.device)

self.transformer_decoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(
d_model=self.embed_size_out,
nhead=n_heads,
dim_feedforward=embed_size*forward_expansion,
dim_feedforward=self.embed_size_out*forward_expansion,
dropout=dropout,
batch_first=True,
device=device
),
num_layers=num_layers
num_layers=num_layers_out
).to(self.device)

self.mapper = nn.Linear(embed_size, ep_len * embed_size).to(self.device)
self.mapper = nn.Linear(self.embed_size_out, ep_len * self.embed_size_out).to(self.device)

self.init_weights()

def forward(self, img_embedded, train_mode=False):
x = self.transformer_encoder(img_embedded)
x = self.translator(x)

x = self.transformer_decoder(x)
x = self.mapper(x)

x = x.view(
*(
[-1, self.ep_len, self.embed_size]
[-1, self.ep_len, self.embed_size_out]
if train_mode else
[self.ep_len, self.embed_size]
[self.ep_len, self.embed_size_out]
)
) # for batched input

Expand All @@ -117,10 +139,10 @@ def __init__(self, device='cpu'):

self.device = device

self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2-xl')
self.tokenizer.pad_token = self.tokenizer.eos_token

self.model = GPT2LMHeadModel.from_pretrained('gpt2').to(self.device)
self.model = GPT2LMHeadModel.from_pretrained('gpt2-xl').to(self.device)
self.vocab_size = self.model.config.vocab_size

def forward(self, embedding, attention_mask=None):
Expand All @@ -146,15 +168,25 @@ def __init__(self, ep_len, num_layers, n_heads, forward_expansion, dropout, max_
'''
super(Net, self).__init__()

assert num_layers >= 2, 'Number of layers must be at least 2.'

self.device = device
self.ep_len = ep_len

self.ie = ImageEncoder(device=device)
self.mp = Mapping(ep_len=self.ep_len, num_layers=num_layers, embed_size=self.ie.model.config.hidden_size, n_heads=n_heads, forward_expansion=forward_expansion, dropout=dropout, device=device)
self.td = TextDecoder(device=device)

assert self.ie.model.config.hidden_size == self.td.model.config.n_embd, "Embedding size of models mismatch"

self.mp = Mapping(
ep_len=self.ep_len,
num_layers=num_layers,
embed_size_inp=self.ie.model.config.hidden_size,
embed_size_out=self.td.model.config.hidden_size,
n_heads=n_heads,
forward_expansion=forward_expansion,
dropout=dropout,
device=device
)

self.max_len = max_len

self.criterion = nn.CrossEntropyLoss(ignore_index=self.td.tokenizer.pad_token_id)
Expand Down Expand Up @@ -270,7 +302,7 @@ def train_forward(self, img_emb, trg_cap, att_mask):

m.train()
N = 10
emb = 768
emb = 1024
length = 20

l = m.train_forward(
Expand Down
9 changes: 4 additions & 5 deletions src/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ def main(rank, world_size, config, ckp_name=''):

trainer.train_epoch()
trainer.valid_epoch()
trainer.test_result()

metadata = trainer.get_training_data()

Expand All @@ -130,21 +129,21 @@ def main(rank, world_size, config, ckp_name=''):
'train_loss': metadata['train_loss'],
'valid_loss': metadata['valid_loss'],
'lr': metadata['lr'],
'examples': wandb.Image(metadata['examples'])
})

if not os.path.exists(config.weights_dir):
os.makedirs(config.weights_dir)

if (epoch + 1) % 10 == 0 and rank == 0:
if (epoch + 1) % 50 == 0 and rank == 0:
trainer.save_ckp(os.path.join(config.weights_dir, f'epoch_{epoch + 1}.pt'))

ddp_cleanup()


if __name__ == '__main__':
# check if there is no GPU - use CPU -> world_size = 1

# check if there is no GPU - use CPU -> world_size = 1
world_size = torch.cuda.device_count() if torch.cuda.is_available() else 1

print(f'Number of GPUs: {world_size}')

mp.spawn(main, args=(world_size, config, ''), nprocs=world_size)
2 changes: 1 addition & 1 deletion src/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from utils.config import *
from utils.download_weights import *
from utils.downloads import *
from utils.lr_warmup import *
12 changes: 0 additions & 12 deletions src/utils/download_weights.py

This file was deleted.

19 changes: 19 additions & 0 deletions src/utils/downloads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
'''
Utility functions for loading weights.
'''

import gdown

def download_weights(checkpoint_fpath):
'''
Downloads weights from Google Drive.
'''

gdown.download('https://drive.google.com/uc?id=10ieSMMJzE9EeiPIF3CMzeT4timiQTjHV', checkpoint_fpath, quiet=False)

def download_dataset(destination_path):
'''
Downloads dataset from Google Drive.
'''

gdown.download('https://drive.google.com/uc?id=1E7lKanGE2Gakgy3mvyUal_B43BxU3vHr', destination_path, quiet=False)