From 342a58ea4ea96a242f6c53c1f8da09483203b532 Mon Sep 17 00:00:00 2001
From: vivek <vivek.uppal@gmail.com>
Date: Sat, 19 Aug 2023 16:27:16 -0400
Subject: [PATCH 1/2] Allow user to select non default speaker, microphone
 using command line arguments.

---
 AudioRecorder.py | 85 ++++++++++++++++++++++++++++++++++++++++--------
 GlobalVars.py    |  8 ++---
 main.py          | 27 +++++++++++----
 ui.py            |  2 +-
 4 files changed, 97 insertions(+), 25 deletions(-)

diff --git a/AudioRecorder.py b/AudioRecorder.py
index da801f4..161a163 100644
--- a/AudioRecorder.py
+++ b/AudioRecorder.py
@@ -1,8 +1,8 @@
+from datetime import datetime
+from abc import abstractmethod
 import custom_speech_recognition as sr
 import pyaudiowpatch as pyaudio
-from datetime import datetime
 import app_logging as al
-from abc import abstractmethod
 
 RECORD_TIMEOUT = 3
 ENERGY_THRESHOLD = 1000
@@ -83,6 +83,8 @@ def print_detailed_audio_info(print_func=print):
 
 
 class BaseRecorder:
+    """Base class for Speaker, Microphone classes
+    """
     def __init__(self, source, source_name):
         root_logger.info(BaseRecorder.__name__)
         self.recorder = sr.Recognizer()
@@ -116,13 +118,16 @@ def record_callback(_, audio: sr.AudioData) -> None:
                                            phrase_time_limit=RECORD_TIMEOUT)
 
 
-class DefaultMicRecorder(BaseRecorder):
+class MicRecorder(BaseRecorder):
+    """Encapsultes the Microphone device audio input
+    """
     def __init__(self):
-        root_logger.info(DefaultMicRecorder.__name__)
-        with pyaudio.PyAudio() as p:
+        root_logger.info(MicRecorder.__name__)
+        with pyaudio.PyAudio() as py_audio:
             # WASAPI is windows specific
-            wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
-            default_mic = p.get_device_info_by_index(wasapi_info["defaultInputDevice"])
+            wasapi_info = py_audio.get_host_api_info_by_type(pyaudio.paWASAPI)
+            self.device_index = wasapi_info["defaultInputDevice"]
+            default_mic = py_audio.get_device_info_by_index(self.device_index)
 
         self.device_info = default_mic
 
@@ -130,20 +135,46 @@ def __init__(self):
                                sample_rate=int(default_mic["defaultSampleRate"]),
                                channels=default_mic["maxInputChannels"]
                                )
+        self.source = source
         super().__init__(source=source, source_name="You")
         print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
+        # This line is commented because in case of non defaul microphone it can occasionally take
+        # several minutes to execute, thus delaying the start of the application.
         # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
 
     def get_name(self):
-        return self.device_info['name']
+        return f'#{self.device_index} - {self.device_info["name"]}'
+
+    def set_device(self, index: int):
+        """Set active device based on index.
+        """
+        root_logger.info(MicRecorder.set_device.__name__)
+        with pyaudio.PyAudio() as py_audio:
+            self.device_index = index
+            mic = py_audio.get_device_info_by_index(self.device_index)
+
+        self.device_info = mic
+
+        source = sr.Microphone(device_index=mic["index"],
+                               sample_rate=int(mic["defaultSampleRate"]),
+                               channels=mic["maxInputChannels"]
+                               )
+        self.source = source
+        print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
+        # This line is commented because in case of non defaul microphone it can occasionally take
+        # several minutes to execute, thus delaying the start of the application.
+        # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
 
 
-class DefaultSpeakerRecorder(BaseRecorder):
+class SpeakerRecorder(BaseRecorder):
+    """Encapsultes the Speaer device audio input
+    """
     def __init__(self):
-        root_logger.info(DefaultSpeakerRecorder.__name__)
+        root_logger.info(SpeakerRecorder.__name__)
         with pyaudio.PyAudio() as p:
             wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
-            default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
+            self.device_index = wasapi_info["defaultOutputDevice"]
+            default_speakers = p.get_device_info_by_index(self.device_index)
 
             if not default_speakers["isLoopbackDevice"]:
                 for loopback in p.get_loopback_device_info_generator():
@@ -163,10 +194,38 @@ def __init__(self):
         super().__init__(source=source, source_name="Speaker")
         print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ')
         self.adjust_for_noise("Default Speaker",
-                              "Please make or play some noise from the Default Speaker...")
+                              "Please play sound from Default Speaker...")
 
     def get_name(self):
-        return self.device_info['name']
+        return f'#{self.device_index} - {self.device_info["name"]}'
+
+    def set_device(self, index: int):
+        """Set active device based on index.
+        """
+        root_logger.info(SpeakerRecorder.set_device.__name__)
+        with pyaudio.PyAudio() as p:
+            self.device_index = index
+            speakers = p.get_device_info_by_index(self.device_index)
+
+            if not speakers["isLoopbackDevice"]:
+                for loopback in p.get_loopback_device_info_generator():
+                    if speakers["name"] in loopback["name"]:
+                        speakers = loopback
+                        break
+                else:
+                    print("[ERROR] No loopback device found.")
+
+        self.device_info = speakers
+
+        source = sr.Microphone(speaker=True,
+                               device_index=speakers["index"],
+                               sample_rate=int(speakers["defaultSampleRate"]),
+                               chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
+                               channels=speakers["maxInputChannels"])
+        self.source = source
+        print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ')
+        self.adjust_for_noise("Speaker",
+                              f"Please play sound from selected Speakers {self.get_name()}...")
 
 
 if __name__ == "__main__":
diff --git a/GlobalVars.py b/GlobalVars.py
index e068787..3caa4a5 100644
--- a/GlobalVars.py
+++ b/GlobalVars.py
@@ -15,8 +15,8 @@ class TranscriptionGlobals(Singleton.Singleton):
     """
 
     audio_queue: queue.Queue = None
-    user_audio_recorder: AudioRecorder.DefaultMicRecorder = None
-    speaker_audio_recorder: AudioRecorder.DefaultSpeakerRecorder = None
+    user_audio_recorder: AudioRecorder.MicRecorder = None
+    speaker_audio_recorder: AudioRecorder.SpeakerRecorder = None
     # Global for transcription from speaker, microphone
     transcriber: AudioTranscriber = None
     # Global for responses from openAI API
@@ -33,8 +33,8 @@ def __init__(self, key: str = 'API_KEY'):
         if self.audio_queue is None:
             self.audio_queue = queue.Queue()
         if self.user_audio_recorder is None:
-            self.user_audio_recorder = AudioRecorder.DefaultMicRecorder()
+            self.user_audio_recorder = AudioRecorder.MicRecorder()
         if self.speaker_audio_recorder is None:
-            self.speaker_audio_recorder = AudioRecorder.DefaultSpeakerRecorder()
+            self.speaker_audio_recorder = AudioRecorder.SpeakerRecorder()
         if self.api_key is None:
             self.api_key = key
diff --git a/main.py b/main.py
index e494e43..1b22eb2 100644
--- a/main.py
+++ b/main.py
@@ -19,6 +19,8 @@
 
 
 def main():
+    """Primary method to run transcribe
+    """
     # Set up all arguments
     cmd_args = argparse.ArgumentParser(description='Command Line Arguments for Transcribe',
                                        formatter_class=RawTextHelpFormatter)
@@ -33,7 +35,7 @@ def main():
     cmd_args.add_argument('-m', '--model', action='store', choices=[
         'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large'],
         default='tiny',
-        help='Specify the model to use for transcription.'
+        help='Specify the LLM to use for transcription.'
         '\nBy default tiny english model is part of the install.'
         '\ntiny multi-lingual model has to be downloaded from the link   '
         'https://drive.google.com/file/d/1M4AFutTmQROaE9xk2jPc5Y4oFRibHhEh/view?usp=drive_link'
@@ -59,6 +61,10 @@ def main():
     cmd_args.add_argument('-l', '--list_devices', action='store_true',
                           help='List all audio drivers and audio devices on this machine. \
                             \nUse this list index to select the microphone, speaker device for transcription.')
+    cmd_args.add_argument('-mi', '--mic_device_index', action='store', default=None, type=int,
+                          help='Device index of the microphone for capturing sound.')
+    cmd_args.add_argument('-si', '--speaker_device_index', action='store', default=None, type=int,
+                          help='Device index of the speaker for capturing sound.')
     args = cmd_args.parse_args()
 
     # Initiate config
@@ -78,6 +84,14 @@ def main():
     # Initiate logging
     log_listener = app_logging.initiate_log(config=config)
 
+    if args.mic_device_index is not None:
+        print('Override default microphone with device specified on command line.')
+        global_vars.user_audio_recorder.set_device(index=args.mic_device_index)
+
+    if args.speaker_device_index is not None:
+        print('Override default speaker with device specified on command line.')
+        global_vars.speaker_audio_recorder.set_device(index=args.speaker_device_index)
+
     try:
         subprocess.run(["ffmpeg", "-version"],
                        stdout=subprocess.DEVNULL,
@@ -109,16 +123,14 @@ def main():
     root = ctk.CTk()
     ui_components = ui.create_ui_components(root)
     transcript_textbox = ui_components[0]
-    response_textbox = ui_components[1]
+    global_vars.response_textbox = ui_components[1]
     update_interval_slider = ui_components[2]
     update_interval_slider_label = ui_components[3]
     global_vars.freeze_button = ui_components[4]
     lang_combobox = ui_components[5]
-    filemenu = ui_components[6]
+    global_vars.filemenu = ui_components[6]
     response_now_button = ui_components[7]
 
-    global_vars.filemenu = filemenu
-    global_vars.response_textbox = response_textbox
     global_vars.user_audio_recorder.record_into_queue(global_vars.audio_queue)
 
     time.sleep(2)
@@ -164,8 +176,9 @@ def main():
     lang_combobox.configure(command=model.change_lang)
 
     ui.update_transcript_ui(global_vars.transcriber, transcript_textbox)
-    ui.update_response_ui(global_vars.responder, response_textbox, update_interval_slider_label,
-                          update_interval_slider, global_vars.freeze_state)
+    ui.update_response_ui(global_vars.responder, global_vars.response_textbox,
+                          update_interval_slider_label, update_interval_slider,
+                          global_vars.freeze_state)
 
     root.mainloop()
     log_listener.stop()
diff --git a/ui.py b/ui.py
index e44064d..f230bde 100644
--- a/ui.py
+++ b/ui.py
@@ -191,7 +191,7 @@ def create_ui_components(root):
     freeze_button = ctk.CTkButton(root, text="Suggest Responses Continuously", command=None)
     freeze_button.grid(row=1, column=1, padx=10, pady=3, sticky="nsew")
 
-    response_now_button = ctk.CTkButton(root, text="Suggest Responses Now", command=None)
+    response_now_button = ctk.CTkButton(root, text="Suggest Response Now", command=None)
     response_now_button.grid(row=2, column=1, padx=10, pady=3, sticky="nsew")
 
     update_interval_slider_label = ctk.CTkLabel(root, text="", font=("Arial", 12),

From 4dc2886a28cb560d1c04331ea056e9fa697e850e Mon Sep 17 00:00:00 2001
From: vivek <vivek.uppal@gmail.com>
Date: Mon, 21 Aug 2023 09:25:31 -0400
Subject: [PATCH 2/2] typos

---
 AudioRecorder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/AudioRecorder.py b/AudioRecorder.py
index 161a163..cbbd351 100644
--- a/AudioRecorder.py
+++ b/AudioRecorder.py
@@ -138,7 +138,7 @@ def __init__(self):
         self.source = source
         super().__init__(source=source, source_name="You")
         print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
-        # This line is commented because in case of non defaul microphone it can occasionally take
+        # This line is commented because in case of non default microphone it can occasionally take
         # several minutes to execute, thus delaying the start of the application.
         # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
 
@@ -161,7 +161,7 @@ def set_device(self, index: int):
                                )
         self.source = source
         print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
-        # This line is commented because in case of non defaul microphone it can occasionally take
+        # This line is commented because in case of non default microphone it can occasionally take
         # several minutes to execute, thus delaying the start of the application.
         # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")