Skip to content

Commit

Permalink
v2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
ashpreetbedi committed May 5, 2024
1 parent 8d00d4d commit 69ce824
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
2 changes: 1 addition & 1 deletion cookbook/llms/ollama/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
)
assistant.print_response("Share a quick healthy breakfast recipe.", markdown=True)
print("\n-*- Metrics:")
pprint(assistant.llm.metrics)
pprint(assistant.llm.metrics) # type: ignore
4 changes: 4 additions & 0 deletions phi/assistant/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -1501,6 +1501,7 @@ async def async_print_response(

def cli_app(
self,
message: Optional[str] = None,
user: str = "User",
emoji: str = ":sunglasses:",
stream: bool = True,
Expand All @@ -1509,6 +1510,9 @@ def cli_app(
) -> None:
from rich.prompt import Prompt

if message:
self.print_response(message=message, stream=stream, markdown=markdown)

_exit_on = exit_on or ["exit", "quit", "bye"]
while True:
message = Prompt.ask(f"[bold] {emoji} {user} [/bold]")
Expand Down
15 changes: 8 additions & 7 deletions phi/llm/ollama/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,9 @@ def response_stream(self, messages: List[Message]) -> Iterator[str]:
yield response_content

response_timer.stop()
logger.debug(f"Number of tokens generated: {completion_tokens}")
logger.debug(f"Time per output token: {response_timer.elapsed/completion_tokens:.4f}s")
logger.debug(f"Throughtput: {completion_tokens/response_timer.elapsed:.4f} tokens/s")
logger.debug(f"Tokens generated: {completion_tokens}")
logger.debug(f"Time per output token: {response_timer.elapsed / completion_tokens:.4f}s")
logger.debug(f"Throughput: {completion_tokens / response_timer.elapsed:.4f} tokens/s")
logger.debug(f"Time to generate response: {response_timer.elapsed:.4f}s")

# -*- Create assistant message
Expand Down Expand Up @@ -313,17 +313,18 @@ def response_stream(self, messages: List[Message]) -> Iterator[str]:

# -*- Update usage metrics
# Add response time to metrics
assistant_message.metrics["time"] = response_timer.elapsed
assistant_message.metrics["time_to_first_token"] = time_to_first_token
assistant_message.metrics["time"] = f"{response_timer.elapsed:.4f}"
assistant_message.metrics["time_to_first_token"] = f"{time_to_first_token:.4f}s"
assistant_message.metrics["time_per_output_token"] = f"{response_timer.elapsed / completion_tokens:.4f}s"
if "response_times" not in self.metrics:
self.metrics["response_times"] = []
self.metrics["response_times"].append(response_timer.elapsed)
if "time_to_first_token" not in self.metrics:
self.metrics["time_to_first_token"] = []
self.metrics["time_to_first_token"].append(time_to_first_token)
self.metrics["time_to_first_token"].append(f"{time_to_first_token:.4f}s")
if "tokens_per_second" not in self.metrics:
self.metrics["tokens_per_second"] = []
self.metrics["tokens_per_second"].append(completion_tokens / response_timer.elapsed)
self.metrics["tokens_per_second"].append(f"{completion_tokens / response_timer.elapsed:.4f}")

# -*- Add assistant message to messages
messages.append(assistant_message)
Expand Down

0 comments on commit 69ce824

Please sign in to comment.