Code clean up, add linting. (#4)

Update readme for addition of new models. Update license agreement for Transcribe. Add linting. Code clean up
SevaSk · vivekuppal · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
commit 8f5a595af03e480f817b25861fa71c38cb5293ae
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 120
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 SevaSk
+Copyright (c) 2023 Vivek Uppal
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -12,6 +12,15 @@ Follow these steps to set up and run transcribe on your local machine.
 - Python >=3.8.0
 - (Optional) An OpenAI API key that can access OpenAI API (set up a paid account OpenAI account)
 - Windows OS (Not tested on others)
+- Git LFS
+
+ Install [Git LFS](https://git-lfs.com/)
+
+ Run the command
+ ```
+ git lfs install
+ ```
+
 - FFmpeg 
 
 If FFmpeg is not installed in your system, follow the steps below to install it.
@@ -81,9 +90,9 @@ The --api flag will use the whisper api for transcriptions. This significantly e
 
 ### ⚠️ Limitations
 
-Transcribe provides real-time transcription and optional response suggestions, there are several known limitations to its functionality that you should be aware of:
+While Transcribe provides real-time transcription and optional response suggestions, there are several known limitations to its functionality that you should be aware of:
 
-**Default Mic and Speaker:** Transcribe is configured to listen only to the default microphone and speaker set. It does not detect sound from other devices or systems. To use a different mic or speaker, need to set it as your default device in your system settings.
+**Default Mic and Speaker:** Transcribe is currently configured to listen only to the default microphone and speaker set in your system. It will not detect sound from other devices or systems. To use a different mic or speaker, need to set it as your default device in your system settings.
 
 **Whisper Model**: If the --api flag is not used, we utilize the 'tiny' version of the Whisper ASR model, due to its low resource consumption and fast response times. However, this model may not be as accurate as the larger models in transcribing certain types of speech, including accents or uncommon words.
 

diff --git a/TranscriberModels.py b/TranscriberModels.py
@@ -3,45 +3,52 @@
 import os
 import torch
 
-def get_model(use_api: bool, model: str=None):
+
+def get_model(use_api: bool, model: str = None):
  if use_api:
  return APIWhisperTranscriber()
- else:
- model_cleaned = model if model else 'tiny'
- print(f'Using model: {model_cleaned}')
- return WhisperTranscriber(model=model_cleaned)
+
+ model_cleaned = model if model else 'tiny'
+ print(f'Using model: {model_cleaned}')
+ return WhisperTranscriber(model=model_cleaned)
+
 
 class WhisperTranscriber:
- def __init__(self, model: str='tiny'):
+ def __init__(self, model: str = 'tiny'):
  model_filename = model + '.en.pt'
 
  if not os.path.isfile(model_filename):
  print(f'Could not find the model file: {model_filename}')
- print(f'Download the model file and add it to the directory: {os.getcwd()}')
- print(f'small model is available at: https://drive.google.com/file/d/1E44DVjpfZX8tSrSagaDJXU91caZOkwa6/view?usp=drive_link')
- print(f'base model is available at: https://drive.google.com/file/d/1E44DVjpfZX8tSrSagaDJXU91caZOkwa6/view?usp=drive_link')
+ print(f'Download the model file and add it to the directory: \
+ {os.getcwd()}')
+ print('small model is available at: \
+ https://drive.google.com/file/d/1E44DVjpfZX8tSrSagaDJXU91caZOkwa6/view?usp=drive_link')
+ print('base model is available at: \
+ https://drive.google.com/file/d/1E44DVjpfZX8tSrSagaDJXU91caZOkwa6/view?usp=drive_link')
  exit()
 
- self.audio_model = whisper.load_model(os.path.join(os.getcwd(), model_filename))
- print(f"[INFO] Whisper using GPU: " + str(torch.cuda.is_available()))
+ self.audio_model = whisper.load_model(os.path.join(os.getcwd(),
+ model_filename))
+ print(f'[INFO] Whisper using GPU: {str(torch.cuda.is_available())}')
 
  def get_transcription(self, wav_file_path):
  try:
  result = self.audio_model.transcribe(wav_file_path, fp16=torch.cuda.is_available())
- except Exception as e:
- print(e)
+ except Exception as exception:
+ print(exception)
  return ''
  return result['text'].strip()
-
+
+
 class APIWhisperTranscriber:
- def __init__(self, model: str='tiny'):
+ def __init__(self):
  print('Using Open AI API for transcription.')
 
  def get_transcription(self, wav_file_path):
  try:
  with open(wav_file_path, "rb") as audio_file:
  result = openai.Audio.transcribe("whisper-1", audio_file)
- except Exception as e:
- print(e)
+ except Exception as exception:
+ print(exception)
  return ''
- return result['text'].strip()
+ return result['text'].strip()
diff --git a/custom_speech_recognition/audio.py b/custom_speech_recognition/audio.py
@@ -99,23 +99,23 @@ def get_raw_data(self, convert_rate=None, convert_width=None):
 
  # convert samples to desired sample width if specified
  if convert_width is not None and self.sample_width != convert_width:
- if (
- convert_width == 3
- ): # we're converting the audio into 24-bit (workaround for https://bugs.python.org/issue12866)
+ # we're converting the audio into 24-bit (workaround https://bugs.python.org/issue12866)
+ if convert_width == 3:
  raw_data = audioop.lin2lin(
  raw_data, self.sample_width, 4
  ) # convert audio into 32-bit first, which is always supported
  try:
- audioop.bias(
- b"", 3, 0
- ) # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do)
- except (
- audioop.error
- ): # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less)
+ # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3
+ # and below don't support sample width 3, while Python 3.4+ do)
+ audioop.bias(b"", 3, 0)
+ except audioop.error:
+ # this version of audioop doesn't support 24-bit audio
+ # since we're in little endian, we discard the first byte from each 32-bit
+ # sample to get a 24-bit sample
  raw_data = b"".join(
- raw_data[i + 1 : i + 4]
+ raw_data[i + 1: i + 4]
  for i in range(0, len(raw_data), 4)
- ) # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample
+ )
  else: # 24-bit audio fully supported, we don't need to shim anything
  raw_data = audioop.lin2lin(
  raw_data, self.sample_width, convert_width
@@ -125,23 +125,28 @@ def get_raw_data(self, convert_rate=None, convert_width=None):
  raw_data, self.sample_width, convert_width
  )
 
- # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again
+ # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating
+ # as signed to unsigned again
  if convert_width == 1:
  raw_data = audioop.bias(
  raw_data, 1, 128
  ) # add 128 to every sample to make them act like unsigned samples again
 
  return raw_data
 
- def get_wav_data(self, convert_rate=None, convert_width=None, nchannels = 1):
+ def get_wav_data(self, convert_rate=None, convert_width=None, nchannels=1):
  """
- Returns a byte string representing the contents of a WAV file containing the audio represented by the ``AudioData`` instance.
+ Returns a byte string representing the contents of a WAV file containing the audio
+ represented by the ``AudioData`` instance.
 
- If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+ If ``convert_width`` is specified and the audio samples are not ``convert_width``
+ bytes each, the resulting audio is converted to match.
 
- If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+ If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz,
+ the resulting audio is resampled to match.
 
- Writing these bytes directly to a file results in a valid `WAV file <https://en.wikipedia.org/wiki/WAV>`__.
+ Writing these bytes directly to a file results in a valid `WAV file
+ <https://en.wikipedia.org/wiki/WAV>`__.
  """
  raw_data = self.get_raw_data(convert_rate, convert_width)
  sample_rate = (
@@ -166,13 +171,17 @@ def get_wav_data(self, convert_rate=None, convert_width=None, nchannels = 1):
 
  def get_aiff_data(self, convert_rate=None, convert_width=None):
  """
- Returns a byte string representing the contents of an AIFF-C file containing the audio represented by the ``AudioData`` instance.
+ Returns a byte string representing the contents of an AIFF-C file containing the audio
+ represented by the ``AudioData`` instance.
 
- If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+ If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes
+ each, the resulting audio is converted to match.
 
- If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+ If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz,
+ the resulting audio is resampled to match.
 
- Writing these bytes directly to a file results in a valid `AIFF-C file <https://en.wikipedia.org/wiki/Audio_Interchange_File_Format>`__.
+ Writing these bytes directly to a file results in a valid `AIFF-C file
+ <https://en.wikipedia.org/wiki/Audio_Interchange_File_Format>`__.
  """
  raw_data = self.get_raw_data(convert_rate, convert_width)
  sample_rate = (
@@ -182,14 +191,17 @@ def get_aiff_data(self, convert_rate=None, convert_width=None):
  self.sample_width if convert_width is None else convert_width
  )
 
- # the AIFF format is big-endian, so we need to convert the little-endian raw data to big-endian
+ # the AIFF format is big-endian, so we need to convert the little-endian
+ # raw data to big-endian
  if hasattr(
  audioop, "byteswap"
  ): # ``audioop.byteswap`` was only added in Python 3.4
  raw_data = audioop.byteswap(raw_data, sample_width)
- else: # manually reverse the bytes of each sample, which is slower but works well enough as a fallback
- raw_data = raw_data[sample_width - 1 :: -1] + b"".join(
- raw_data[i + sample_width : i : -1]
+ else: 
+ # manually reverse the bytes of each sample, which is slower but
+ # works well enough as a fallback
+ raw_data = raw_data[sample_width - 1:: -1] + b"".join(
+ raw_data[i + sample_width: i: -1]
  for i in range(sample_width - 1, len(raw_data), sample_width)
  )
 
@@ -208,31 +220,35 @@ def get_aiff_data(self, convert_rate=None, convert_width=None):
 
  def get_flac_data(self, convert_rate=None, convert_width=None):
  """
- Returns a byte string representing the contents of a FLAC file containing the audio represented by the ``AudioData`` instance.
+ Returns a byte string representing the contents of a FLAC file containing the audio
+ represented by the ``AudioData`` instance.
 
- Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width`` is not specified, then the resulting FLAC will be a 24-bit FLAC.
+ Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width``
+ is not specified, then the resulting FLAC will be a 24-bit FLAC.
 
- If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match.
+ If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz,
+ the resulting audio is resampled to match.
 
- If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match.
+ If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes
+ each, the resulting audio is converted to match.
 
- Writing these bytes directly to a file results in a valid `FLAC file <https://en.wikipedia.org/wiki/FLAC>`__.
+ Writing these bytes directly to a file results in a valid
+ `FLAC file <https://en.wikipedia.org/wiki/FLAC>`__.
  """
  assert convert_width is None or (
  convert_width % 1 == 0 and 1 <= convert_width <= 3
  ), "Sample width to convert to must be between 1 and 3 inclusive"
 
- if (
- self.sample_width > 3 and convert_width is None
- ): # resulting WAV data would be 32-bit, which is not convertable to FLAC using our encoder
- convert_width = 3 # the largest supported sample width is 24-bit, so we'll limit the sample width to that
+ if self.sample_width > 3 and convert_width is None:
+ # resulting WAV data would be 32-bit, which is not convertable to FLAC using our encoder
+  # the largest supported sample width is 24-bit, so we'll limit the sample width to that
+ convert_width = 3
 
  # run the FLAC converter with the WAV data to get the FLAC data
  wav_data = self.get_wav_data(convert_rate, convert_width)
  flac_converter = get_flac_converter()
- if (
- os.name == "nt"
- ): # on Windows, specify that the process is to be started without showing a console window
+ # on Windows, specify that the process is to be started without showing a console window
+ if os.name == "nt": 
  startup_info = subprocess.STARTUPINFO()
  startup_info.dwFlags |= (
  subprocess.STARTF_USESHOWWINDOW
@@ -254,12 +270,13 @@ def get_flac_data(self, convert_rate=None, convert_width=None):
  stdout=subprocess.PIPE,
  startupinfo=startup_info,
  )
- flac_data, stderr = process.communicate(wav_data)
+ flac_data, stderr=process.communicate(wav_data)
  return flac_data
 
 
 def get_flac_converter():
- """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
+ """Returns the absolute path of a FLAC converter executable, or raises
+ an OSError if none can be found."""
  flac_converter = shutil_which("flac") # check for installed version first
  if flac_converter is None: # flac utility is not installed
  base_path = os.path.dirname(
@@ -288,7 +305,9 @@ def get_flac_converter():
  flac_converter = os.path.join(base_path, "flac-linux-x86_64")
  else: # no FLAC converter available
  raise OSError(
- "FLAC conversion utility not available - consider installing the FLAC command line application by running `apt-get install flac` or your operating system's equivalent"
+ 'FLAC conversion utility not available - consider installing the FLAC command \
+ line application by running `apt-get install flac` or your operating system\'s \
+ equivalent'
  )
 
  # mark FLAC converter as executable if possible

diff --git a/custom_speech_recognition/recognizers/whisper.py b/custom_speech_recognition/recognizers/whisper.py
@@ -15,13 +15,15 @@ def recognize_whisper_api(
  api_key: str | None = None,
 ):
  """
- Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the OpenAI Whisper API.
+ Perform speech recognition on ``audio_data`` (``AudioData`` instance), using OpenAI Whisper API.
 
- This function requires an OpenAI account; visit https://platform.openai.com/signup, then generate API Key in `User settings <https://platform.openai.com/account/api-keys>`__.
+ This function requires an OpenAI account; visit https://platform.openai.com/signup, then generate
+ API Key in `User settings <https://platform.openai.com/account/api-keys>`__.
 
  Detail: https://platform.openai.com/docs/guides/speech-to-text
 
- Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the openai installation, or the environment variable is missing.
+ Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the
+ openai installation, or the environment variable is missing.
  """
  if not isinstance(audio_data, AudioData):
  raise ValueError("``audio_data`` must be an ``AudioData`` instance")
@@ -31,9 +33,7 @@ def recognize_whisper_api(
  try:
  import openai
  except ImportError:
- raise SetupError(
- "missing openai module: ensure that openai is set up correctly."
- )
+ raise SetupError("missing openai module: ensure that openai is set up correctly.")
 
  wav_data = BytesIO(audio_data.get_wav_data())
  wav_data.name = "SpeechRecognition_audio.wav"