UI Text Chronology (#5)

* Show all text in chronological order instead of current behavior of showing only the last MAX_PHRASES(10) items. Use the last MAX_PHRASES(10) for generating a response to the conversation. * Remove print statements, debugging code. * Remove references to Git LFS. Add a note about models.
SevaSk · vivekuppal · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
commit 59d5c91e6e71538f2cb16c4d404e04884b28c0ab
diff --git a/AudioTranscriber.py b/AudioTranscriber.py
@@ -1,18 +1,17 @@
-import whisper
-import torch
-import wave
 import os
 import threading
-import tempfile
-import custom_speech_recognition as sr
 import io
 from datetime import timedelta
+import wave
+import tempfile
+import whisper
+import torch
+import custom_speech_recognition as sr
 import pyaudiowpatch as pyaudio
 from heapq import merge
 
 PHRASE_TIMEOUT = 3.05
 
-MAX_PHRASES = 10
 
 class AudioTranscriber:
  def __init__(self, mic_source, speaker_source, model):
@@ -52,8 +51,8 @@ def transcribe_audio_queue(self, audio_queue):
  os.close(fd)
  source_info["process_data_func"](source_info["last_sample"], path)
  text = self.audio_model.get_transcription(path)
- except Exception as e:
- print(e)
+ except Exception as exception:
+ print(exception)
  finally:
  os.unlink(path)
 
@@ -91,19 +90,18 @@ def update_transcript(self, who_spoke, text, time_spoken):
  transcript = self.transcript_data[who_spoke]
 
  if source_info["new_phrase"] or len(transcript) == 0:
- if len(transcript) > MAX_PHRASES:
- transcript.pop(-1)
- transcript.insert(0, (f"{who_spoke}: [{text}]\n\n", time_spoken))
+ transcript.append((f"{who_spoke}: [{text}]\n\n", time_spoken))
  else:
- transcript[0] = (f"{who_spoke}: [{text}]\n\n", time_spoken)
+ transcript.pop()
+ transcript.append((f"{who_spoke}: [{text}]\n\n", time_spoken))
 
- def get_transcript(self):
+ def get_transcript(self, length: int = 0):
  combined_transcript = list(merge(
- self.transcript_data["You"], self.transcript_data["Speaker"], 
- key=lambda x: x[1], reverse=True))
- combined_transcript = combined_transcript[:MAX_PHRASES]
+ self.transcript_data["You"], self.transcript_data["Speaker"],
+ key=lambda x: x[1], reverse=False))
+ combined_transcript = combined_transcript[-length:]
  return "".join([t[0] for t in combined_transcript])
- 
+
  def clear_transcript_data(self):
  self.transcript_data["You"].clear()
  self.transcript_data["Speaker"].clear()
@@ -112,4 +110,4 @@ def clear_transcript_data(self):
  self.audio_sources["Speaker"]["last_sample"] = bytes()
 
  self.audio_sources["You"]["new_phrase"] = True
- self.audio_sources["Speaker"]["new_phrase"] = True
+ self.audio_sources["Speaker"]["new_phrase"] = True
diff --git a/GPTResponder.py b/GPTResponder.py
@@ -4,23 +4,26 @@
 import time
 
 openai.api_key = OPENAI_API_KEY
+# Number of phrases to use for generating a response
+MAX_PHRASES = 10
 
 def generate_response_from_transcript(transcript):
  try:
  response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo-0301",
  messages=[{"role": "system", "content": create_prompt(transcript)}],
- temperature = 0.0
+ temperature=0.0
  )
- except Exception as e:
- print(e)
+ except Exception as exception:
+ print(exception)
  return ''
  full_response = response.choices[0].message.content
  try:
  return full_response.split('[')[1].split(']')[0]
  except:
  return ''
-
+
+
 class GPTResponder:
  def __init__(self):
  self.response = INITIAL_RESPONSE
@@ -32,7 +35,7 @@ def respond_to_transcriber(self, transcriber):
  start_time = time.time()
 
  transcriber.transcript_changed_event.clear()
- transcript_string = transcriber.get_transcript()
+ transcript_string = transcriber.get_transcript(length=MAX_PHRASES)
  response = generate_response_from_transcript(transcript_string)
 
  end_time = time.time() # Measure end time
@@ -48,4 +51,4 @@ def respond_to_transcriber(self, transcriber):
  time.sleep(0.3)
 
  def update_response_interval(self, interval):
- self.response_interval = interval
+ self.response_interval = interval
diff --git a/README.md b/README.md
@@ -12,15 +12,6 @@ Follow these steps to set up and run transcribe on your local machine.
 - Python >=3.8.0
 - (Optional) An OpenAI API key that can access OpenAI API (set up a paid account OpenAI account)
 - Windows OS (Not tested on others)
-- Git LFS
-
- Install [Git LFS](https://git-lfs.com/)
-
- Run the command
- ```
- git lfs install
- ```
-
 - FFmpeg 
 
 If FFmpeg is not installed in your system, follow the steps below to install it.
@@ -101,6 +92,8 @@ While Transcribe provides real-time transcription and optional response suggesti
 Incorrect API key provided: API_KEY. You can find your API key at https://platform.openai.com/account/api-keys.
 ```
 
+**Models**: The default install of transcribe has the tiny(72 Mb) model. base (138 Mb), small (461 Mb) models can be downloaded and used for transcription by following instructions using transcribe command line. The larger models provide better quality transcription and they have higher memory requirements.
+
 **Language**: If you are not using the --api flag the Whisper model used in Transcribe is set to English. As a result, it may not accurately transcribe non-English languages or dialects. 
 
 ## 📖 License

diff --git a/keys.py b/keys.py
@@ -1 +1 @@
-OPENAI_API_KEY="API_KEY"
+OPENAI_API_KEY = "API_KEY"
diff --git a/main.py b/main.py
@@ -20,6 +20,7 @@ def write_in_textbox(textbox, text):
 def update_transcript_UI(transcriber, textbox):
  transcript_string = transcriber.get_transcript()
  write_in_textbox(textbox, transcript_string)
+ textbox.see("end")
  textbox.after(300, update_transcript_UI, transcriber, textbox)
 
 
@@ -89,9 +90,12 @@ def main():
  help='Use the online Open AI API for transcription.\
  \nThis option requires an API KEY and will consume Open AI credits.')
  cmd_args.add_argument('-m', '--model', action='store', choices=['tiny', 'base', 'small'], default='tiny',
- help='Specify the model to use for transcription.'\
- '\nOpenAI has more models besides the ones specified above.'\
- '\nThose models are prohibitive to use on local machines because of memory requirements.'\
+ help='Specify the model to use for transcription.'
+ '\nBy default tiny model is part of the install.'
+ '\nbase model has to be downloaded from the link https://drive.google.com/file/d/1E44DVjpfZX8tSrSagaDJXU91caZOkwa6/view?usp=drive_link'
+ '\nsmall model has to be downloaded from the link https://drive.google.com/file/d/1E44DVjpfZX8tSrSagaDJXU91caZOkwa6/view?usp=drive_link'
+ '\nOpenAI has more models besides the ones specified above.'
+ '\nThose models are prohibitive to use on local machines because of memory requirements.'
  '\nThis option is only applicable when not using the --api option.')
  args = cmd_args.parse_args()
 
@@ -118,6 +122,7 @@ def main():
  speaker_audio_recorder.record_into_queue(audio_queue)
  model = TranscriberModels.get_model(args.api, model=args.model)
 
+ # Transcribe and Respond threads, both work on the same instance of the AudioTranscriber class
  transcriber = AudioTranscriber(user_audio_recorder.source,
  speaker_audio_recorder.source, model)
  transcribe = threading.Thread(target=transcriber.transcribe_audio_queue,