Skip to content

Commit

Permalink
Merge branch 'ollama/response-metrics' into v2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
ashpreetbedi committed May 5, 2024
2 parents 3126f8d + 4a3dc10 commit cf10972
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
4 changes: 4 additions & 0 deletions cookbook/llms/ollama/assistant.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from rich.pretty import pprint
from phi.assistant import Assistant
from phi.llm.ollama import Ollama

assistant = Assistant(
llm=Ollama(model="llama3"),
description="You help people with their health and fitness goals.",
debug_mode=True,
)
assistant.print_response("Share a quick healthy breakfast recipe.", markdown=True)
print("\n-*- Metrics:")
pprint(assistant.llm.metrics)
11 changes: 11 additions & 0 deletions phi/llm/ollama/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,14 @@ def response_stream(self, messages: List[Message]) -> Iterator[str]:
tool_call_bracket_count = 0
is_last_tool_call_bracket = False
completion_tokens = 0
time_to_first_token = None
response_timer = Timer()
response_timer.start()
for response in self.invoke_stream(messages=messages):
completion_tokens += 1
if completion_tokens == 1:
time_to_first_token = response_timer.elapsed
logger.debug(f"Time to first token: {time_to_first_token:.4f}s")

# -*- Parse response
# logger.info(f"Ollama partial response: {response}")
Expand Down Expand Up @@ -307,9 +311,16 @@ def response_stream(self, messages: List[Message]) -> Iterator[str]:
# -*- Update usage metrics
# Add response time to metrics
assistant_message.metrics["time"] = response_timer.elapsed
assistant_message.metrics["time_to_first_token"] = time_to_first_token
if "response_times" not in self.metrics:
self.metrics["response_times"] = []
self.metrics["response_times"].append(response_timer.elapsed)
if "time_to_first_token" not in self.metrics:
self.metrics["time_to_first_token"] = []
self.metrics["time_to_first_token"].append(time_to_first_token)
if "tokens_per_second" not in self.metrics:
self.metrics["tokens_per_second"] = []
self.metrics["tokens_per_second"].append(completion_tokens / response_timer.elapsed)

# -*- Add assistant message to messages
messages.append(assistant_message)
Expand Down

0 comments on commit cf10972

Please sign in to comment.