List all active devices (#45)

* List all audio drivers, devices(speakers, microphone) on the machine. * Add info about the mic, speaker in use currently.
SevaSk · vivekuppal · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
commit b4e03a40b70813c04fa25fb8641cd2864817214e
diff --git a/AudioRecorder.py b/AudioRecorder.py
@@ -2,6 +2,7 @@
 import pyaudiowpatch as pyaudio
 from datetime import datetime
 import app_logging as al
+from abc import abstractmethod
 
 RECORD_TIMEOUT = 3
 ENERGY_THRESHOLD = 1000
@@ -10,6 +11,77 @@
 root_logger = al.get_logger()
 
 
+# https://people.csail.mit.edu/hubert/pyaudio/docs/#id6
+driver_type = {
+ -1: 'Not actually an audio device',
+ 0: 'Still in development',
+ 1: 'DirectSound (Windows only)',
+ 2: 'Multimedia Extension (Windows only)',
+ 3: 'Steinberg Audio Stream Input/Output',
+ 4: 'SoundManager (OSX only)',
+ 5: 'CoreAudio (OSX only)',
+ 7: 'Open Sound System (Linux only)',
+ 8: 'Advanced Linux Sound Architecture (Linux only)',
+ 9: 'Open Audio Library',
+ 10: 'BeOS Sound System',
+ 11: 'Windows Driver Model (Windows only)',
+ 12: 'JACK Audio Connection Kit',
+ 13: 'Windows Vista Audio stack architecture'
+}
+
+
+def print_detailed_audio_info(print_func=print):
+ """
+ Print information about Host APIs and devices,
+ using `print_func`.
+
+ :param print_func: Print function(or wrapper)
+ :type print_func: function
+ :rtype: None
+ """
+ print_func("\n|", "~ Audio Drivers on this machine ~".center(20), "|\n")
+ header = f" ^ #{'INDEX'.center(7)}#{'DRIVER TYPE'.center(13)}#{'DEVICE COUNT'.center(15)}#{'NAME'.center(5)}"
+ print_func(header)
+ print_func("-"*len(header))
+ py_audio = pyaudio.PyAudio()
+ for host_api in py_audio.get_host_api_info_generator():
+ print_func(
+ (
+ f" » "
+ f"{('['+str(host_api['index'])+']').center(8)}|"
+ f"{str(host_api['type']).center(13)}|"
+ f"{str(host_api['deviceCount']).center(15)}|"
+ f" {host_api['name']}"
+ )
+ )
+
+ print_func("\n\n\n|", "~ Audio Devices on this machine ~".center(20), "|\n")
+ header = f" ^ #{'INDEX'.center(7)}# HOST API INDEX #{'LOOPBACK'.center(10)}#{'NAME'.center(5)}"
+ print_func(header)
+ print_func("-"*len(header))
+ for device in py_audio.get_device_info_generator():
+ print_func(
+ (
+ f" » "
+ f"{('['+str(device['index'])+']').center(8)}"
+ f"{str(device['hostApi']).center(16)}"
+ f" {str(device['isLoopbackDevice']).center(10)}"
+ f" {device['name']}"
+ )
+ )
+
+ # Below statements are useful to view all available fields in the
+ # driver and device list
+ # Do not remove these statements from here
+ # print('Windows Audio Drivers')
+ # for host_api_info_gen in py_audio.get_host_api_info_generator():
+ # print(host_api_info_gen)
+
+ # print('Windows Audio Devices')
+ # for device_info_gen in py_audio.get_device_info_generator():
+ # print(device_info_gen)
+
+
 class BaseRecorder:
  def __init__(self, source, source_name):
  root_logger.info(BaseRecorder.__name__)
@@ -23,6 +95,11 @@ def __init__(self, source, source_name):
  self.source = source
  self.source_name = source_name
 
+ @abstractmethod
+ def get_name(self):
+ """Get the name of this device
+ """
+
  def adjust_for_noise(self, device_name, msg):
  root_logger.info(BaseRecorder.adjust_for_noise.__name__)
  print(f"[INFO] Adjusting for ambient noise from {device_name}. " + msg)
@@ -42,8 +119,23 @@ def record_callback(_, audio: sr.AudioData) -> None:
 class DefaultMicRecorder(BaseRecorder):
  def __init__(self):
  root_logger.info(DefaultMicRecorder.__name__)
- super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
- self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
+ with pyaudio.PyAudio() as p:
+ # WASAPI is windows specific
+ wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
+ default_mic = p.get_device_info_by_index(wasapi_info["defaultInputDevice"])
+
+ self.device_info = default_mic
+
+ source = sr.Microphone(device_index=default_mic["index"],
+ sample_rate=int(default_mic["defaultSampleRate"]),
+ channels=default_mic["maxInputChannels"]
+ )
+ super().__init__(source=source, source_name="You")
+ print(f'[INFO] Listening to sound from Microphone: {self.get_name()} ')
+ # self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
+
+ def get_name(self):
+ return self.device_info['name']
 
 
 class DefaultSpeakerRecorder(BaseRecorder):
@@ -61,11 +153,32 @@ def __init__(self):
  else:
  print("[ERROR] No loopback device found.")
 
+ self.device_info = default_speakers
+
  source = sr.Microphone(speaker=True,
  device_index=default_speakers["index"],
  sample_rate=int(default_speakers["defaultSampleRate"]),
  chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
  channels=default_speakers["maxInputChannels"])
  super().__init__(source=source, source_name="Speaker")
+ print(f'[INFO] Listening to sound from Speaker: {self.get_name()} ')
  self.adjust_for_noise("Default Speaker",
  "Please make or play some noise from the Default Speaker...")
+
+ def get_name(self):
+ return self.device_info['name']
+
+
+if __name__ == "__main__":
+ print_detailed_audio_info()
+ # Below statements are useful to view all available fields in the
+ # default Input Device.
+ # Do not delete these lines
+ # with pyaudio.PyAudio() as p:
+ # wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
+ # print(wasapi_info)
+
+ # with pyaudio.PyAudio() as p:
+ # wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
+ # default_mic = p.get_device_info_by_index(wasapi_info["defaultInputDevice"])
+ # print(default_mic)
diff --git a/custom_speech_recognition/__init__.py b/custom_speech_recognition/__init__.py
@@ -89,6 +89,7 @@ def __init__(self, device_index=None, sample_rate=None, chunk_size=1024,
  assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
  assert sample_rate is None or (isinstance(sample_rate, int) and sample_rate > 0), "Sample rate must be None or a positive integer"
  assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer"
+ self.device_info = None
 
  # set up PyAudio
  self.speaker = speaker
@@ -131,7 +132,7 @@ def get_pyaudio():
  return pyaudio
 
  @staticmethod
- def list_microphone_names():
+ def list_microphone_names() -> list:
  """
  Returns a list of the names of all available microphones. For microphones where
  the name can't be retrieved, the list entry contains ``None`` instead.
@@ -151,7 +152,7 @@ def list_microphone_names():
  return result
 
  @staticmethod
- def list_working_microphones():
+ def list_working_microphones() -> dict:
  """
  Returns a dictionary mapping device indices to microphone names,
  for microphones that are currently hearing sounds. When using this function,
@@ -162,6 +163,8 @@ def list_working_microphones():
  constructor to use that microphone. For example, if the return value
  is ``{3: "HDA Intel PCH: ALC3232 Analog (hw:1,0)"}``, you can do
  ``Microphone(device_index=3)`` to use that microphone.
+
+ At the moment when this method is called, it does not return for several minutes.
  """
  pyaudio_module = Microphone.get_pyaudio()
  audio = pyaudio_module.PyAudio()

diff --git a/main.py b/main.py
@@ -8,6 +8,7 @@
 import customtkinter as ctk
 from AudioTranscriber import AudioTranscriber
 from GPTResponder import GPTResponder
+import AudioRecorder as ar
 import TranscriberModels
 import interactions
 import ui
@@ -24,41 +25,50 @@ def main():
  cmd_args.add_argument('-a', '--api', action='store_true',
  help='Use the online Open AI API for transcription.\
  \nThis option requires an API KEY and will consume Open AI credits.')
+ cmd_args.add_argument('-e', '--experimental', action='store_true',
+ help='Experimental command line argument. Behavior is undefined.')
  cmd_args.add_argument('-k', '--api_key', action='store', default=None,
  help='API Key for accessing OpenAI APIs. This is an optional parameter.\
- Without the API Key only transcription works.')
- cmd_args.add_argument('-m', '--model', action='store', choices=['tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large'],
- default='tiny',
- help='Specify the model to use for transcription.'
- '\nBy default tiny english model is part of the install.'
- '\ntiny multi-lingual model has to be downloaded from the link \
- https://drive.google.com/file/d/1M4AFutTmQROaE9xk2jPc5Y4oFRibHhEh/view?usp=drive_link'
- '\nbase english model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt'
- '\nbase multi-lingual model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt'
- '\nsmall english model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt'
- '\nsmall multi-lingual model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt'
- '\n The models below require higher computing power:'
- '\nmedium english model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt'
- '\nmedium multi-lingual model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt'
- '\nlarge model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt'
- '\nlarge-v1 model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt'
- '\nlarge-v2 model has to be downloaded from the link \
- https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt')
- cmd_args.add_argument('-e', '--experimental', action='store_true', help='Experimental command\
- line argument. Behavior is undefined.')
+ \nWithout the API Key only transcription works.')
+ cmd_args.add_argument('-m', '--model', action='store', choices=[
+ 'tiny', 'base', 'small', 'medium', 'large-v1', 'large-v2', 'large'],
+ default='tiny',
+ help='Specify the model to use for transcription.'
+ '\nBy default tiny english model is part of the install.'
+ '\ntiny multi-lingual model has to be downloaded from the link '
+ 'https://drive.google.com/file/d/1M4AFutTmQROaE9xk2jPc5Y4oFRibHhEh/view?usp=drive_link'
+ '\nbase english model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt'
+ '\nbase multi-lingual model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt'
+ '\nsmall english model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt'
+ '\nsmall multi-lingual model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt'
+ '\n\nThe models below require higher computing power: \n\n'
+ '\nmedium english model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt'
+ '\nmedium multi-lingual model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt'
+ '\nlarge model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt'
+ '\nlarge-v1 model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt'
+ '\nlarge-v2 model has to be downloaded from the link '
+ 'https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt')
+ cmd_args.add_argument('-l', '--list_devices', action='store_true',
+ help='List all audio drivers and audio devices on this machine. \
+ \nUse this list index to select the microphone, speaker device for transcription.')
  args = cmd_args.parse_args()
 
  # Initiate config
  config = configuration.Config().get_data()
 
+ if args.list_devices:
+ print('\n\nList all audio drivers and devices on this machine')
+ ar.print_detailed_audio_info()
+ return
+
  # Initiate global variables
  # Two calls to GlobalVars.TranscriptionGlobals is on purpose
  global_vars = GlobalVars.TranscriptionGlobals()
@@ -88,9 +98,9 @@ def main():
 
  # Command line arg for api_key takes preference over api_key specified in parameters.yaml file
  if args.api_key is not None:
- api_key = args.api_key
+ api_key: bool = args.api_key
  else:
- api_key = config['OpenAI']['api_key']
+ api_key: bool = config['OpenAI']['api_key']
 
  global_vars.api_key = api_key