Optimize LLM usage (#40)

* Do not ping LLM if we do not need to. Previously we were always pinging LLM even when response suggestions were off. Partial refactoring work towards separating the conversation its own object. * Make openai model configurable using parameters.yaml so it is easy to change for end user.
SevaSk · vivekuppal · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
commit 26cfaad40581332247375f00ebc9e792f0194db4
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,5 @@ __pycache__/
 *.wav
 .venv/
 venv
-output
+output
+tiny.pt
diff --git a/AudioTranscriber.py b/AudioTranscriber.py
@@ -1,19 +1,20 @@
 import os
+import queue
 import threading
 import io
 from datetime import timedelta
 import wave
 import tempfile
-import whisper
 import custom_speech_recognition as sr
 import pyaudiowpatch as pyaudio
 from heapq import merge
+import conversation
 
 PHRASE_TIMEOUT = 3.05
 
 
 class AudioTranscriber:
- def __init__(self, mic_source, speaker_source, model):
+ def __init__(self, mic_source, speaker_source, model, convo: conversation.Conversation):
  self.transcript_data = {"You": [], "Speaker": []}
  self.transcript_changed_event = threading.Event()
  self.audio_model = model
@@ -38,8 +39,9 @@ def __init__(self, mic_source, speaker_source, model):
  "process_data_func": self.process_speaker_data
  }
  }
+ self.conversation = convo
 
- def transcribe_audio_queue(self, audio_queue):
+ def transcribe_audio_queue(self, audio_queue: queue.Queue):
  """Transcribe data from audio sources. In this case we have 2 sources, microphone, speaker.
  Args:
  audio_queue: queue object with reference to audio files
@@ -109,9 +111,15 @@ def update_transcript(self, who_spoke, text, time_spoken):
 
  if source_info["new_phrase"] or len(transcript) == 0:
  transcript.append((f"{who_spoke}: [{text}]\n\n", time_spoken))
+ self.conversation.update_conversation(persona=who_spoke,
+ time_spoken=time_spoken,
+ text=text)
  else:
  transcript.pop()
  transcript.append((f"{who_spoke}: [{text}]\n\n", time_spoken))
+ self.conversation.update_conversation(persona=who_spoke,
+ time_spoken=time_spoken,
+ text=text, pop=True)
 
  def get_transcript(self, length: int = 0):
  """Get the audio transcript

diff --git a/GPTResponder.py b/GPTResponder.py
@@ -2,23 +2,29 @@
 import GlobalVars
 from prompts import create_prompt, INITIAL_RESPONSE
 import time
-
+import conversation
+import constants
+import configuration
 
 # Number of phrases to use for generating a response
-MAX_PHRASES = 10
+MAX_PHRASES = 20
 
 
 class GPTResponder:
- def __init__(self):
+ def __init__(self, convo: conversation.Conversation):
  self.response = INITIAL_RESPONSE
  self.response_interval = 2
- openai.api_key = GlobalVars.TranscriptionGlobals().api_key
+ self.gl_vars = GlobalVars.TranscriptionGlobals()
+ openai.api_key = self.gl_vars.api_key
+ self.conversation = convo
+ self.config = configuration.Config().get_data()
+ self.model = self.config['OpenAI']['ai_model']
 
- def generate_response_from_transcript(self, transcript):
+ def generate_response_from_transcript_no_check(self, transcript):
  try:
  prompt_content = create_prompt(transcript)
  response = openai.ChatCompletion.create(
- model="gpt-3.5-turbo-0301",
+ model=self.model,
  messages=[{"role": "system", "content": prompt_content}],
  temperature=0.0
  )
@@ -31,8 +37,18 @@ def generate_response_from_transcript(self, transcript):
  except:
  return ''
 
+ def generate_response_from_transcript(self, transcript):
+ """Ping OpenAI LLM model to get response from the Assistant
+ """
+
+ if self.gl_vars.freeze_state[0]:
+ return ''
+
+ return generate_response_from_transcript_no_check(self, transcript)
+
  def respond_to_transcriber(self, transcriber):
  while True:
+
  if transcriber.transcript_changed_event.is_set():
  start_time = time.time()
 
@@ -45,6 +61,9 @@ def respond_to_transcriber(self, transcriber):
 
  if response != '':
  self.response = response
+ self.conversation.update_conversation(persona=constants.PERSONA_ASSISTANT,
+ text=response,
+ time_spoken=end_time)
 
  remaining_time = self.response_interval - execution_time
  if remaining_time > 0:

diff --git a/constants.py b/constants.py
@@ -0,0 +1,7 @@
+"""Globally used constants
+"""
+
+PERSONA_YOU = "You"
+PERSONA_ASSISTANT = "Assistant"
+PERSONA_SYSTEM = "System"
+PERSONA_SPEAKER = "Speaker"
diff --git a/conversation.py b/conversation.py
@@ -0,0 +1,60 @@
+from heapq import merge
+import constants
+import configuration
+
+
+class Conversation:
+ """Encapsulates the complete conversation.
+ Has text from Speakers, Microphone, LLM, Instructions to LLM
+ """
+
+ def __init__(self):
+ self.transcript_data = {constants.PERSONA_SYSTEM: [],
+ constants.PERSONA_YOU: [],
+ constants.PERSONA_SPEAKER: [],
+ constants.PERSONA_ASSISTANT: []}
+ config = configuration.Config().get_data()
+
+ def clear_conversation_data(self):
+ """Clear all conversation data
+ """
+ self.transcript_data[constants.PERSONA_YOU].clear()
+ self.transcript_data[constants.PERSONA_SPEAKER].clear()
+ self.transcript_data[constants.PERSONA_SYSTEM].clear()
+ self.transcript_data[constants.PERSONA_ASSISTANT].clear()
+
+ def update_conversation(self, persona: str, text: str, time_spoken, pop: bool = False):
+ """Update conversation with new data
+ Args:
+ person: person this part of conversation is attributed to
+ text: Actual words
+ time_spoken: Time at which conversation happened, this is typically reported in local time
+ """
+ transcript = self.transcript_data[persona]
+ if pop:
+ transcript.pop()
+ transcript.append((f"{persona}: [{text}]\n\n", time_spoken))
+
+ def get_conversation(self,
+ sources: list = None,
+ length: int = 0):
+ """Get the complete transcript
+ Args:
+ sources: Get data from which sources (You, Speaker, Assistant, System)
+ length: Get the last length elements from the audio transcript.
+ Default value = 0, gives the complete transcript
+ """
+ if sources is None:
+ sources = [constants.PERSONA_YOU,
+ constants.PERSONA_SPEAKER,
+ constants.PERSONA_ASSISTANT,
+ constants.PERSONA_SYSTEM]
+
+ combined_transcript = list(merge(
+ self.transcript_data[constants.PERSONA_YOU][-length:],
+ self.transcript_data[constants.PERSONA_SPEAKER][-length:],
+ self.transcript_data[constants.PERSONA_ASSISTANT][-length:],
+ self.transcript_data[constants.PERSONA_SYSTEM][-length:],
+ key=lambda x: x[1]))
+ combined_transcript = combined_transcript[-length:]
+ return "".join([t[0] for t in combined_transcript])
diff --git a/main.py b/main.py
@@ -14,7 +14,7 @@
 from language import LANGUAGES_DICT
 import GlobalVars
 import configuration
-
+import conversation
 
 def main():
  # Set up all arguments
@@ -95,16 +95,20 @@ def main():
  time.sleep(2)
 
  global_vars.speaker_audio_recorder.record_into_queue(global_vars.audio_queue)
+ global_vars.freeze_state = [True]
+ convo = conversation.Conversation()
 
  # Transcribe and Respond threads, both work on the same instance of the AudioTranscriber class
  global_vars.transcriber = AudioTranscriber(global_vars.user_audio_recorder.source,
- global_vars.speaker_audio_recorder.source, model)
+ global_vars.speaker_audio_recorder.source,
+ model,
+ convo=convo)
  transcribe_thread = threading.Thread(target=global_vars.transcriber.transcribe_audio_queue,
  args=(global_vars.audio_queue,))
  transcribe_thread.daemon = True
  transcribe_thread.start()
 
- global_vars.responder = GPTResponder()
+ global_vars.responder = GPTResponder(convo=convo)
 
  respond_thread = threading.Thread(target=global_vars.responder.respond_to_transcriber,
  args=(global_vars.transcriber,))
@@ -120,8 +124,6 @@ def main():
  root.grid_columnconfigure(0, weight=2)
  root.grid_columnconfigure(1, weight=1)
 
- global_vars.freeze_state = [True]
-
  ui_cb = ui.ui_callbacks()
  global_vars.freeze_button.configure(command=ui_cb.freeze_unfreeze)
  label_text = f'Update Response interval: {update_interval_slider.get()} seconds'

diff --git a/parameters.yaml b/parameters.yaml
@@ -1,2 +1,11 @@
 OpenAI:
  api_key: 'API_KEY'
+
+# Possible model values
+# gpt-3.5-turbo, gpt-3.5-turbo-16k, gpt-3.5-turbo-0613, gpt-3.5-turbo-16k-0613
+# gpt-4, gpt-4-0613, gpt-4-32k, gpt-4-32k-0613
+# Legacy models
+# text-davinci-003, text-davinci-002, code-davinci-002
+# See this link for available models
+# https://platform.openai.com/docs/models/continuous-model-upgrades
+ ai_model: gpt-3.5-turbo-0301