update token counting

kharvd · kharvd · Jun 23, 2023 · Jun 24, 2023 · Jun 24, 2023 · Jun 24, 2023
commit 8297c3e8834e076f53ad58a799abab8d17552c3f
diff --git a/gptcli/openai.py b/gptcli/openai.py
@@ -74,18 +74,52 @@ def complete(
  yield next_choice
 
 
-def num_tokens_from_messages_openai(messages: List[Message], model: str) -> int:
- encoding = tiktoken.encoding_for_model(model)
+def num_tokens_from_messages_openai(messages, model="gpt-3.5-turbo-0613"):
+ """Return the number of tokens used by a list of messages."""
+ try:
+ encoding = tiktoken.encoding_for_model(model)
+ except KeyError:
+ print("Warning: model not found. Using cl100k_base encoding.")
+ encoding = tiktoken.get_encoding("cl100k_base")
+ if model in {
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-16k-0613",
+ "gpt-4-0314",
+ "gpt-4-32k-0314",
+ "gpt-4-0613",
+ "gpt-4-32k-0613",
+ }:
+ tokens_per_message = 3
+ tokens_per_name = 1
+ elif model == "gpt-3.5-turbo-0301":
+ tokens_per_message = (
+ 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
+ )
+ tokens_per_name = -1 # if there's a name, the role is omitted
+ elif "gpt-3.5-turbo" in model:
+ return num_tokens_from_messages_openai(messages, model="gpt-3.5-turbo-0613")
+ elif "gpt-4" in model:
+ return num_tokens_from_messages_openai(messages, model="gpt-4-0613")
+ else:
+ raise NotImplementedError(
+ f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+ )
  num_tokens = 0
  for message in messages:
- # every message follows <im_start>{role/name}\n{content}<im_end>\n
- num_tokens += 4
+ num_tokens += tokens_per_message
  for key, value in message.items():
- assert isinstance(value, str)
+ logging.debug(f"key: {key}, value: {value}")
+ if key == "function_call":
+ # TODO: is this correct?
+ value = f"{value['name']}({value['arguments']})"
+ if key == "content":
+ # TODO: content is None for some messages with function calls
+ if value is None:
+ continue
  num_tokens += len(encoding.encode(value))
- if key == "name": # if there's a name, the role is omitted
- num_tokens += -1 # role is always required and always 1 token
- num_tokens += 2 # every reply is primed with <im_start>assistant
+ if key == "name":
+ num_tokens += tokens_per_name
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
  return num_tokens