Skip to content

Commit

Permalink
Merge pull request EleutherAI#806 from EleutherAI/tiktoken_import_fix
Browse files Browse the repository at this point in the history
Only import tiktoken if we intend to use it
  • Loading branch information
StellaAthena committed Mar 3, 2023
2 parents 60cac3a + e8a4684 commit 192022f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion configs/neox_arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Logging Arguments

- **git_hash**: str

Default = a7febb2
Default = 496f987

current git hash of repository

Expand Down
6 changes: 5 additions & 1 deletion megatron/tokenizer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from transformers import GPT2Tokenizer, GPT2TokenizerFast
import numpy as np
import sentencepiece as spm
import tiktoken
from typing import List, Union
from .gpt2_tokenization import GPT2Tokenizer

Expand Down Expand Up @@ -353,6 +352,11 @@ def eod(self):

class TiktokenTokenizer(AbstractTokenizer):
"""Tokenizer from OpenAI's tiktoken implementation"""
try:
import tiktoken
except ModuleNotFoundError:
print("Please install tiktoken: (https://github.com/openai/tiktoken)")
raise Exception

def __init__(self, vocab_file):
name = "TiktokenTokenizer"
Expand Down

0 comments on commit 192022f

Please sign in to comment.