Skip to content

Commit

Permalink
Only import tiktoken if we intend to use it
Browse files Browse the repository at this point in the history
  • Loading branch information
Quentin-Anthony committed Mar 3, 2023
1 parent 72c8071 commit 496f987
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion megatron/tokenizer/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from transformers import GPT2Tokenizer, GPT2TokenizerFast
import numpy as np
import sentencepiece as spm
import tiktoken
from typing import List, Union
from .gpt2_tokenization import GPT2Tokenizer

Expand Down Expand Up @@ -353,6 +352,11 @@ def eod(self):

class TiktokenTokenizer(AbstractTokenizer):
"""Tokenizer from OpenAI's tiktoken implementation"""
try:
import tiktoken
except ModuleNotFoundError:
print("Please install tiktoken: (https://github.com/openai/tiktoken)")
raise Exception

def __init__(self, vocab_file):
name = "TiktokenTokenizer"
Expand Down

0 comments on commit 496f987

Please sign in to comment.