Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenAI code interpreter (draft) #37

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update token counting
  • Loading branch information
kharvd committed Jun 24, 2023
commit 8297c3e8834e076f53ad58a799abab8d17552c3f
50 changes: 42 additions & 8 deletions gptcli/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,52 @@ def complete(
yield next_choice


def num_tokens_from_messages_openai(messages: List[Message], model: str) -> int:
encoding = tiktoken.encoding_for_model(model)
def num_tokens_from_messages_openai(messages, model="gpt-3.5-turbo-0613"):
"""Return the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
print("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model in {
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613",
}:
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = (
4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
)
tokens_per_name = -1 # if there's a name, the role is omitted
elif "gpt-3.5-turbo" in model:
return num_tokens_from_messages_openai(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4" in model:
return num_tokens_from_messages_openai(messages, model="gpt-4-0613")
else:
raise NotImplementedError(
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
)
num_tokens = 0
for message in messages:
# every message follows <im_start>{role/name}\n{content}<im_end>\n
num_tokens += 4
num_tokens += tokens_per_message
for key, value in message.items():
assert isinstance(value, str)
logging.debug(f"key: {key}, value: {value}")
if key == "function_call":
# TODO: is this correct?
value = f"{value['name']}({value['arguments']})"
if key == "content":
# TODO: content is None for some messages with function calls
if value is None:
continue
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens


Expand Down