Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow user to select input, output audio devices #48

Merged
merged 2 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 72 additions & 13 deletions AudioRecorder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from datetime import datetime
from abc import abstractmethod
import custom_speech_recognition as sr
import pyaudiowpatch as pyaudio
from datetime import datetime
import app_logging as al
from abc import abstractmethod

RECORD_TIMEOUT = 3
ENERGY_THRESHOLD = 1000
Expand Down Expand Up @@ -83,6 +83,8 @@ def print_detailed_audio_info(print_func=print):


class BaseRecorder:
"""Base class for Speaker, Microphone classes
"""
def __init__(self, source, source_name):
root_logger.info(BaseRecorder.__name__)
self.recorder = sr.Recognizer()
Expand Down Expand Up @@ -116,34 +118,63 @@ def record_callback(_, audio: sr.AudioData) -> None:
phrase_time_limit=RECORD_TIMEOUT)


class DefaultMicRecorder(BaseRecorder):
class MicRecorder(BaseRecorder):
"""Encapsultes the Microphone device audio input
"""
def __init__(self):
root_logger.info(DefaultMicRecorder.__name__)
with pyaudio.PyAudio() as p:
root_logger.info(MicRecorder.__name__)
with pyaudio.PyAudio() as py_audio:
# WASAPI is windows specific
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
default_mic = p.get_device_info_by_index(wasapi_info["defaultInputDevice"])
wasapi_info = py_audio.get_host_api_info_by_type(pyaudio.paWASAPI)
self.device_index = wasapi_info["defaultInputDevice"]
default_mic = py_audio.get_device_info_by_index(self.device_index)

self.device_info = default_mic

source = sr.Microphone(device_index=default_mic["index"],
sample_rate=int(default_mic["defaultSampleRate"]),
channels=default_mic["maxInputChannels"]
)
self.source = source
super().__init__(source=source, source_name="You")
print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
# This line is commented because in case of non default microphone it can occasionally take
# several minutes to execute, thus delaying the start of the application.
# self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")

def get_name(self):
return self.device_info['name']
return f'#{self.device_index} - {self.device_info["name"]}'

def set_device(self, index: int):
"""Set active device based on index.
"""
root_logger.info(MicRecorder.set_device.__name__)
with pyaudio.PyAudio() as py_audio:
self.device_index = index
mic = py_audio.get_device_info_by_index(self.device_index)

self.device_info = mic

source = sr.Microphone(device_index=mic["index"],
sample_rate=int(mic["defaultSampleRate"]),
channels=mic["maxInputChannels"]
)
self.source = source
print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
# This line is commented because in case of non default microphone it can occasionally take
# several minutes to execute, thus delaying the start of the application.
# self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")


class DefaultSpeakerRecorder(BaseRecorder):
class SpeakerRecorder(BaseRecorder):
"""Encapsultes the Speaer device audio input
"""
def __init__(self):
root_logger.info(DefaultSpeakerRecorder.__name__)
root_logger.info(SpeakerRecorder.__name__)
with pyaudio.PyAudio() as p:
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
self.device_index = wasapi_info["defaultOutputDevice"]
default_speakers = p.get_device_info_by_index(self.device_index)

if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
Expand All @@ -163,10 +194,38 @@ def __init__(self):
super().__init__(source=source, source_name="Speaker")
print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ')
self.adjust_for_noise("Default Speaker",
"Please make or play some noise from the Default Speaker...")
"Please play sound from Default Speaker...")

def get_name(self):
return self.device_info['name']
return f'#{self.device_index} - {self.device_info["name"]}'

def set_device(self, index: int):
"""Set active device based on index.
"""
root_logger.info(SpeakerRecorder.set_device.__name__)
with pyaudio.PyAudio() as p:
self.device_index = index
speakers = p.get_device_info_by_index(self.device_index)

if not speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
if speakers["name"] in loopback["name"]:
speakers = loopback
break
else:
print("[ERROR] No loopback device found.")

self.device_info = speakers

source = sr.Microphone(speaker=True,
device_index=speakers["index"],
sample_rate=int(speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=speakers["maxInputChannels"])
self.source = source
print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ')
self.adjust_for_noise("Speaker",
f"Please play sound from selected Speakers {self.get_name()}...")


if __name__ == "__main__":
Expand Down
8 changes: 4 additions & 4 deletions GlobalVars.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class TranscriptionGlobals(Singleton.Singleton):
"""

audio_queue: queue.Queue = None
user_audio_recorder: AudioRecorder.DefaultMicRecorder = None
speaker_audio_recorder: AudioRecorder.DefaultSpeakerRecorder = None
user_audio_recorder: AudioRecorder.MicRecorder = None
speaker_audio_recorder: AudioRecorder.SpeakerRecorder = None
# Global for transcription from speaker, microphone
transcriber: AudioTranscriber = None
# Global for responses from openAI API
Expand All @@ -33,8 +33,8 @@ def __init__(self, key: str = 'API_KEY'):
if self.audio_queue is None:
self.audio_queue = queue.Queue()
if self.user_audio_recorder is None:
self.user_audio_recorder = AudioRecorder.DefaultMicRecorder()
self.user_audio_recorder = AudioRecorder.MicRecorder()
if self.speaker_audio_recorder is None:
self.speaker_audio_recorder = AudioRecorder.DefaultSpeakerRecorder()
self.speaker_audio_recorder = AudioRecorder.SpeakerRecorder()
if self.api_key is None:
self.api_key = key
27 changes: 20 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@


def main():
"""Primary method to run transcribe
"""
# Set up all arguments
cmd_args = argparse.ArgumentParser(description='Command Line Arguments for Transcribe',
formatter_class=RawTextHelpFormatter)
Expand All @@ -33,7 +35,7 @@ def main():
cmd_args.add_argument('-m', '--model', action='store', choices=[
'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large'],
default='tiny',
help='Specify the model to use for transcription.'
help='Specify the LLM to use for transcription.'
'\nBy default tiny english model is part of the install.'
'\ntiny multi-lingual model has to be downloaded from the link '
'https://drive.google.com/file/d/1M4AFutTmQROaE9xk2jPc5Y4oFRibHhEh/view?usp=drive_link'
Expand All @@ -59,6 +61,10 @@ def main():
cmd_args.add_argument('-l', '--list_devices', action='store_true',
help='List all audio drivers and audio devices on this machine. \
\nUse this list index to select the microphone, speaker device for transcription.')
cmd_args.add_argument('-mi', '--mic_device_index', action='store', default=None, type=int,
help='Device index of the microphone for capturing sound.')
cmd_args.add_argument('-si', '--speaker_device_index', action='store', default=None, type=int,
help='Device index of the speaker for capturing sound.')
args = cmd_args.parse_args()

# Initiate config
Expand All @@ -78,6 +84,14 @@ def main():
# Initiate logging
log_listener = app_logging.initiate_log(config=config)

if args.mic_device_index is not None:
print('Override default microphone with device specified on command line.')
global_vars.user_audio_recorder.set_device(index=args.mic_device_index)

if args.speaker_device_index is not None:
print('Override default speaker with device specified on command line.')
global_vars.speaker_audio_recorder.set_device(index=args.speaker_device_index)

try:
subprocess.run(["ffmpeg", "-version"],
stdout=subprocess.DEVNULL,
Expand Down Expand Up @@ -109,16 +123,14 @@ def main():
root = ctk.CTk()
ui_components = ui.create_ui_components(root)
transcript_textbox = ui_components[0]
response_textbox = ui_components[1]
global_vars.response_textbox = ui_components[1]
update_interval_slider = ui_components[2]
update_interval_slider_label = ui_components[3]
global_vars.freeze_button = ui_components[4]
lang_combobox = ui_components[5]
filemenu = ui_components[6]
global_vars.filemenu = ui_components[6]
response_now_button = ui_components[7]

global_vars.filemenu = filemenu
global_vars.response_textbox = response_textbox
global_vars.user_audio_recorder.record_into_queue(global_vars.audio_queue)

time.sleep(2)
Expand Down Expand Up @@ -164,8 +176,9 @@ def main():
lang_combobox.configure(command=model.change_lang)

ui.update_transcript_ui(global_vars.transcriber, transcript_textbox)
ui.update_response_ui(global_vars.responder, response_textbox, update_interval_slider_label,
update_interval_slider, global_vars.freeze_state)
ui.update_response_ui(global_vars.responder, global_vars.response_textbox,
update_interval_slider_label, update_interval_slider,
global_vars.freeze_state)

root.mainloop()
log_listener.stop()
Expand Down
2 changes: 1 addition & 1 deletion ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def create_ui_components(root):
freeze_button = ctk.CTkButton(root, text="Suggest Responses Continuously", command=None)
freeze_button.grid(row=1, column=1, padx=10, pady=3, sticky="nsew")

response_now_button = ctk.CTkButton(root, text="Suggest Responses Now", command=None)
response_now_button = ctk.CTkButton(root, text="Suggest Response Now", command=None)
response_now_button.grid(row=2, column=1, padx=10, pady=3, sticky="nsew")

update_interval_slider_label = ctk.CTkLabel(root, text="", font=("Arial", 12),
Expand Down