Skip to content
This repository has been archived by the owner on Aug 11, 2022. It is now read-only.

Commit

Permalink
samples
Browse files Browse the repository at this point in the history
  • Loading branch information
vpj committed May 18, 2022
1 parent c5f0a48 commit 0548572
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 8 deletions.
8 changes: 5 additions & 3 deletions src/neox/samples/fine_tune_biases.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from labml import tracker, experiment, monit
from neox.data import get_training_data
from neox.utils import load_layers, balance_layers
from neox.utils import balance_layers, LayerGenerator
from neox.utils.training import train, get_trainable_params, train_biases_only

# List of layers to load. This is used for testing.
Expand All @@ -39,7 +39,9 @@ def main():
experiment.create(name='finetune_neox_biases', comment='Pipeline parallel', writers={'screen', 'web_api'})

# Load layers
layers = load_layers(LAYERS)
layers = list(LayerGenerator(is_clone_layers=True,
filter_layers=LAYERS,
).load())

# Mark `requires_grad=True` for biases using a [helper function](../utils/training.html).
train_biases_only(layers)
Expand Down Expand Up @@ -67,7 +69,7 @@ def main():
sampler=RandomSampler(dataset, replacement=True))

# Initialize optimizer
optimizer = optim.Adam(get_trainable_params(pipe_model), lr=1e-6)
optimizer = optim.Adam(get_trainable_params(pipe_model), lr=3e-4)

# Train the model using the [helper function](../utils/training.html)
with experiment.start():
Expand Down
101 changes: 101 additions & 0 deletions src/neox/samples/generating_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
---
title: Generate Text with GPT-NeoX by Evaluating Layer by Layer
summary: >
Generate Text with GPT-NeoX by evaluating layer by layer
---
# Generate Text with GPT-NeoX by Evaluating Layer by Layer
This shows how to generate text from GPT-NeoX with a single GPU.
This needs a GPU with more than 45GB memory.
"""

# Imports
from typing import List

import torch
from torch import nn

from labml import monit
from neox.utils import get_tokens, print_tokens, LayerGenerator
from neox.utils.cache import get_cache

# List of layers to load. This is used for testing.
# You can assign a subset of layers like `{0, 1}` so that it only loads
# the first to transformer layers.
LAYERS = None

# Prompt to complete
PROMPT = 'Einstein was born in the German Empire, but moved to Switzerland in 1895, forsaking his German'


def infer(model: nn.Module, ids: List[int], device: torch.device):
"""
### Predict the next token
:param layers: is the list of layers
:param ids: are the input token ids
:param device: is the device of the model
"""

with torch.no_grad():
# Get the tokens
x = torch.tensor(ids)[None, :].to(device)
# Eval model
x = model(x)

# Return predicted token
return x[0].max(dim=-1)[1].tolist()


def generate():
"""
## Generate text
"""

# Setup [cache](../utils/cache.html) to cache intermediate key/value pairs for faster generation
cache = get_cache()
cache.set('use_cache', True)

# Device
device = torch.device('cuda:0')

# Load layers
layers = list(LayerGenerator(is_clone_layers=True,
filter_layers=LAYERS,
dtype=torch.float16,
device=device,
).load())

model = nn.Sequential(*layers)

# Get token ids
ids = get_tokens(PROMPT)

# Run the model
cache.set('state_ids', (None, 1))
with monit.section('Infer'):
next_token = infer(model, ids, device)[-1]

# Append the predicted token
ids += [next_token]

# Predict 100 tokens
for i in range(1, 100):
# Set the state to use cached activations
cache.set('state_ids', (i, i + 1))
# Get next token. Note that we only feed the last token to the model because
# we cache the key/value pairs of previous tokens.
with monit.section('Infer'):
next_token = infer(model, [next_token], device)[-1]
# Append the predicted token
ids += [next_token]
# Print
print_tokens(ids, [ids])


#
if __name__ == '__main__':
generate()
6 changes: 4 additions & 2 deletions src/neox/samples/generating_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch import nn

from labml import monit
from neox.utils import load_layers, get_tokens, print_tokens, balance_layers
from neox.utils import load_layers, get_tokens, print_tokens, balance_layers, LayerGenerator
from neox.utils.cache import get_cache

# List of layers to load. This is used for testing.
Expand Down Expand Up @@ -58,7 +58,9 @@ def generate():
cache.set('use_cache', True)

# Load layers
layers = load_layers(LAYERS)
layers = list(LayerGenerator(is_clone_layers=True,
filter_layers=LAYERS,
).load())

# Create pipeline parallel model
with monit.section('Pipe'):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
# Generate Text with GPT-NeoX by Evaluating Layer by Layer
This shows how to generate text from GPT-NeoX with a single GPU.
This shows how to generate text from GPT-NeoX with a small GPU.
It will first load all layers to memory and then load layer-by-layer to GPU for inference.
This requires enough memory on computer to load entire model.
Even a small GPU is enough since we load only a single layer at a time to the GPU.
"""

# Imports
Expand All @@ -17,7 +21,7 @@
from torch import nn

from labml import monit
from neox.utils import load_layers, get_tokens, print_tokens
from neox.utils import get_tokens, print_tokens, LayerGenerator
from neox.utils.cache import get_cache

# List of layers to load. This is used for testing.
Expand Down Expand Up @@ -70,7 +74,10 @@ def generate():
cache.set('use_cache', True)

# Load layers
layers = load_layers(LAYERS)
layers = list(LayerGenerator(is_clone_layers=True,
filter_layers=LAYERS,
dtype=torch.float16,
).load())

# Device
device = torch.device('cuda:0')
Expand Down

0 comments on commit 0548572

Please sign in to comment.