From 342a58ea4ea96a242f6c53c1f8da09483203b532 Mon Sep 17 00:00:00 2001 From: vivek Date: Sat, 19 Aug 2023 16:27:16 -0400 Subject: [PATCH 1/2] Allow user to select non default speaker, microphone using command line arguments. --- AudioRecorder.py | 85 ++++++++++++++++++++++++++++++++++++++++-------- GlobalVars.py | 8 ++--- main.py | 27 +++++++++++---- ui.py | 2 +- 4 files changed, 97 insertions(+), 25 deletions(-) diff --git a/AudioRecorder.py b/AudioRecorder.py index da801f4..161a163 100644 --- a/AudioRecorder.py +++ b/AudioRecorder.py @@ -1,8 +1,8 @@ +from datetime import datetime +from abc import abstractmethod import custom_speech_recognition as sr import pyaudiowpatch as pyaudio -from datetime import datetime import app_logging as al -from abc import abstractmethod RECORD_TIMEOUT = 3 ENERGY_THRESHOLD = 1000 @@ -83,6 +83,8 @@ def print_detailed_audio_info(print_func=print): class BaseRecorder: + """Base class for Speaker, Microphone classes + """ def __init__(self, source, source_name): root_logger.info(BaseRecorder.__name__) self.recorder = sr.Recognizer() @@ -116,13 +118,16 @@ def record_callback(_, audio: sr.AudioData) -> None: phrase_time_limit=RECORD_TIMEOUT) -class DefaultMicRecorder(BaseRecorder): +class MicRecorder(BaseRecorder): + """Encapsultes the Microphone device audio input + """ def __init__(self): - root_logger.info(DefaultMicRecorder.__name__) - with pyaudio.PyAudio() as p: + root_logger.info(MicRecorder.__name__) + with pyaudio.PyAudio() as py_audio: # WASAPI is windows specific - wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) - default_mic = p.get_device_info_by_index(wasapi_info["defaultInputDevice"]) + wasapi_info = py_audio.get_host_api_info_by_type(pyaudio.paWASAPI) + self.device_index = wasapi_info["defaultInputDevice"] + default_mic = py_audio.get_device_info_by_index(self.device_index) self.device_info = default_mic @@ -130,20 +135,46 @@ def __init__(self): sample_rate=int(default_mic["defaultSampleRate"]), channels=default_mic["maxInputChannels"] ) + self.source = source super().__init__(source=source, source_name="You") print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ') + # This line is commented because in case of non defaul microphone it can occasionally take + # several minutes to execute, thus delaying the start of the application. # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...") def get_name(self): - return self.device_info['name'] + return f'#{self.device_index} - {self.device_info["name"]}' + + def set_device(self, index: int): + """Set active device based on index. + """ + root_logger.info(MicRecorder.set_device.__name__) + with pyaudio.PyAudio() as py_audio: + self.device_index = index + mic = py_audio.get_device_info_by_index(self.device_index) + + self.device_info = mic + + source = sr.Microphone(device_index=mic["index"], + sample_rate=int(mic["defaultSampleRate"]), + channels=mic["maxInputChannels"] + ) + self.source = source + print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ') + # This line is commented because in case of non defaul microphone it can occasionally take + # several minutes to execute, thus delaying the start of the application. + # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...") -class DefaultSpeakerRecorder(BaseRecorder): +class SpeakerRecorder(BaseRecorder): + """Encapsultes the Speaer device audio input + """ def __init__(self): - root_logger.info(DefaultSpeakerRecorder.__name__) + root_logger.info(SpeakerRecorder.__name__) with pyaudio.PyAudio() as p: wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI) - default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"]) + self.device_index = wasapi_info["defaultOutputDevice"] + default_speakers = p.get_device_info_by_index(self.device_index) if not default_speakers["isLoopbackDevice"]: for loopback in p.get_loopback_device_info_generator(): @@ -163,10 +194,38 @@ def __init__(self): super().__init__(source=source, source_name="Speaker") print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ') self.adjust_for_noise("Default Speaker", - "Please make or play some noise from the Default Speaker...") + "Please play sound from Default Speaker...") def get_name(self): - return self.device_info['name'] + return f'#{self.device_index} - {self.device_info["name"]}' + + def set_device(self, index: int): + """Set active device based on index. + """ + root_logger.info(SpeakerRecorder.set_device.__name__) + with pyaudio.PyAudio() as p: + self.device_index = index + speakers = p.get_device_info_by_index(self.device_index) + + if not speakers["isLoopbackDevice"]: + for loopback in p.get_loopback_device_info_generator(): + if speakers["name"] in loopback["name"]: + speakers = loopback + break + else: + print("[ERROR] No loopback device found.") + + self.device_info = speakers + + source = sr.Microphone(speaker=True, + device_index=speakers["index"], + sample_rate=int(speakers["defaultSampleRate"]), + chunk_size=pyaudio.get_sample_size(pyaudio.paInt16), + channels=speakers["maxInputChannels"]) + self.source = source + print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ') + self.adjust_for_noise("Speaker", + f"Please play sound from selected Speakers {self.get_name()}...") if __name__ == "__main__": diff --git a/GlobalVars.py b/GlobalVars.py index e068787..3caa4a5 100644 --- a/GlobalVars.py +++ b/GlobalVars.py @@ -15,8 +15,8 @@ class TranscriptionGlobals(Singleton.Singleton): """ audio_queue: queue.Queue = None - user_audio_recorder: AudioRecorder.DefaultMicRecorder = None - speaker_audio_recorder: AudioRecorder.DefaultSpeakerRecorder = None + user_audio_recorder: AudioRecorder.MicRecorder = None + speaker_audio_recorder: AudioRecorder.SpeakerRecorder = None # Global for transcription from speaker, microphone transcriber: AudioTranscriber = None # Global for responses from openAI API @@ -33,8 +33,8 @@ def __init__(self, key: str = 'API_KEY'): if self.audio_queue is None: self.audio_queue = queue.Queue() if self.user_audio_recorder is None: - self.user_audio_recorder = AudioRecorder.DefaultMicRecorder() + self.user_audio_recorder = AudioRecorder.MicRecorder() if self.speaker_audio_recorder is None: - self.speaker_audio_recorder = AudioRecorder.DefaultSpeakerRecorder() + self.speaker_audio_recorder = AudioRecorder.SpeakerRecorder() if self.api_key is None: self.api_key = key diff --git a/main.py b/main.py index e494e43..1b22eb2 100644 --- a/main.py +++ b/main.py @@ -19,6 +19,8 @@ def main(): + """Primary method to run transcribe + """ # Set up all arguments cmd_args = argparse.ArgumentParser(description='Command Line Arguments for Transcribe', formatter_class=RawTextHelpFormatter) @@ -33,7 +35,7 @@ def main(): cmd_args.add_argument('-m', '--model', action='store', choices=[ 'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large'], default='tiny', - help='Specify the model to use for transcription.' + help='Specify the LLM to use for transcription.' '\nBy default tiny english model is part of the install.' '\ntiny multi-lingual model has to be downloaded from the link ' 'https://drive.google.com/file/d/1M4AFutTmQROaE9xk2jPc5Y4oFRibHhEh/view?usp=drive_link' @@ -59,6 +61,10 @@ def main(): cmd_args.add_argument('-l', '--list_devices', action='store_true', help='List all audio drivers and audio devices on this machine. \ \nUse this list index to select the microphone, speaker device for transcription.') + cmd_args.add_argument('-mi', '--mic_device_index', action='store', default=None, type=int, + help='Device index of the microphone for capturing sound.') + cmd_args.add_argument('-si', '--speaker_device_index', action='store', default=None, type=int, + help='Device index of the speaker for capturing sound.') args = cmd_args.parse_args() # Initiate config @@ -78,6 +84,14 @@ def main(): # Initiate logging log_listener = app_logging.initiate_log(config=config) + if args.mic_device_index is not None: + print('Override default microphone with device specified on command line.') + global_vars.user_audio_recorder.set_device(index=args.mic_device_index) + + if args.speaker_device_index is not None: + print('Override default speaker with device specified on command line.') + global_vars.speaker_audio_recorder.set_device(index=args.speaker_device_index) + try: subprocess.run(["ffmpeg", "-version"], stdout=subprocess.DEVNULL, @@ -109,16 +123,14 @@ def main(): root = ctk.CTk() ui_components = ui.create_ui_components(root) transcript_textbox = ui_components[0] - response_textbox = ui_components[1] + global_vars.response_textbox = ui_components[1] update_interval_slider = ui_components[2] update_interval_slider_label = ui_components[3] global_vars.freeze_button = ui_components[4] lang_combobox = ui_components[5] - filemenu = ui_components[6] + global_vars.filemenu = ui_components[6] response_now_button = ui_components[7] - global_vars.filemenu = filemenu - global_vars.response_textbox = response_textbox global_vars.user_audio_recorder.record_into_queue(global_vars.audio_queue) time.sleep(2) @@ -164,8 +176,9 @@ def main(): lang_combobox.configure(command=model.change_lang) ui.update_transcript_ui(global_vars.transcriber, transcript_textbox) - ui.update_response_ui(global_vars.responder, response_textbox, update_interval_slider_label, - update_interval_slider, global_vars.freeze_state) + ui.update_response_ui(global_vars.responder, global_vars.response_textbox, + update_interval_slider_label, update_interval_slider, + global_vars.freeze_state) root.mainloop() log_listener.stop() diff --git a/ui.py b/ui.py index e44064d..f230bde 100644 --- a/ui.py +++ b/ui.py @@ -191,7 +191,7 @@ def create_ui_components(root): freeze_button = ctk.CTkButton(root, text="Suggest Responses Continuously", command=None) freeze_button.grid(row=1, column=1, padx=10, pady=3, sticky="nsew") - response_now_button = ctk.CTkButton(root, text="Suggest Responses Now", command=None) + response_now_button = ctk.CTkButton(root, text="Suggest Response Now", command=None) response_now_button.grid(row=2, column=1, padx=10, pady=3, sticky="nsew") update_interval_slider_label = ctk.CTkLabel(root, text="", font=("Arial", 12), From 4dc2886a28cb560d1c04331ea056e9fa697e850e Mon Sep 17 00:00:00 2001 From: vivek Date: Mon, 21 Aug 2023 09:25:31 -0400 Subject: [PATCH 2/2] typos --- AudioRecorder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AudioRecorder.py b/AudioRecorder.py index 161a163..cbbd351 100644 --- a/AudioRecorder.py +++ b/AudioRecorder.py @@ -138,7 +138,7 @@ def __init__(self): self.source = source super().__init__(source=source, source_name="You") print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ') - # This line is commented because in case of non defaul microphone it can occasionally take + # This line is commented because in case of non default microphone it can occasionally take # several minutes to execute, thus delaying the start of the application. # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...") @@ -161,7 +161,7 @@ def set_device(self, index: int): ) self.source = source print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ') - # This line is commented because in case of non defaul microphone it can occasionally take + # This line is commented because in case of non default microphone it can occasionally take # several minutes to execute, thus delaying the start of the application. # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")