Skip to content

Commit

Permalink
Prefer transformer over sentencepiece tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
armbues committed Jul 2, 2024
1 parent a34fbca commit f3b7530
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions sillm/core/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,12 @@ def load_model_dir(model_path: str) -> LLM:
# Load tokenizer
tokenizer = None
tokenizer_path = None
if (model_path / "tokenizer.model").exists():
tokenizer_path = model_path / "tokenizer.model"
tokenizer = SentencePieceTokenizer(str(tokenizer_path), model_args)
elif (model_path / "tokenizer.json").exists():
if (model_path / "tokenizer.json").exists():
tokenizer_path = model_path / "tokenizer.json"
tokenizer = TransformerTokenizer(str(model_path), model_args)
elif (model_path / "tokenizer.model").exists():
tokenizer_path = model_path / "tokenizer.model"
tokenizer = SentencePieceTokenizer(str(tokenizer_path), model_args)
elif model_args.model_type == "dbrx":
tokenizer_path = "tiktoken"
tokenizer = TiktokenTokenizer(model_args)
Expand Down

0 comments on commit f3b7530

Please sign in to comment.