Remove assert statement from non-test files

HHousen · HHousen · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021 · 6b00fc03205de3f5aa2fa05e7100e2de5c48cc7b
commit 6b00fc03205de3f5aa2fa05e7100e2de5c48cc7b
diff --git a/lecture2notes/end_to_end/corner_crop_transform.py b/lecture2notes/end_to_end/corner_crop_transform.py
@@ -494,7 +494,8 @@ def crop(
  Returns:
  [tuple]: path to cropped image and failed (True if no slide bounding box found, false otherwise)
  """
- assert mode in ["automatic", "contours", "hough_lines"]
+ if mode not in ["automatic", "contours", "hough_lines"]:
+ raise AssertionError
 
  if not debug_output_imgs:
  debug_output_imgs = None

diff --git a/lecture2notes/end_to_end/summarization_approaches.py b/lecture2notes/end_to_end/summarization_approaches.py
@@ -105,7 +105,8 @@ def compute_ranks(sigma, v_matrix):
  MIN_DIMENSIONS = 3
  REDUCTION_RATIO = 1 / 1
 
- assert len(sigma) == v_matrix.shape[0], "Matrices should be multiplicable"
+ if len(sigma) != v_matrix.shape[0]:
+ raise AssertionError("Matrices should be multiplicable")
 
  dimensions = max(MIN_DIMENSIONS, int(len(sigma) * REDUCTION_RATIO))
  powered_sigma = tuple(
@@ -133,7 +134,8 @@ def get_best_sentences(sentences, count, rating, *args, **kwargs):
  )
  rate = rating
  if isinstance(rating, list):
- assert not args and not kwargs
+ if not (not args and not kwargs):
+ raise AssertionError
  rate = lambda o: rating[o] # noqa: E731
 
  infos = (
@@ -453,14 +455,17 @@ def cluster(
  Returns:
  [str]: The summarized text as a normal string. Line breaks will be included if ``title_generation`` is true.
  """
- assert cluster_summarizer in ["extractive", "abstractive"]
- assert feature_extraction in ["neural_hf", "neural_sbert", "spacy", "bow"]
+ if cluster_summarizer not in ["extractive", "abstractive"]:
+ raise AssertionError
+ if feature_extraction not in ["neural_hf", "neural_sbert", "spacy", "bow"]:
+ raise AssertionError
  if (cluster_summarizer == "extractive") and (feature_extraction != "bow"):
  raise Exception(
  "If cluster_summarizer is set to 'extractive', feature_extraction cannot be set to 'bow' because extractive summarization is based off the ranks calculated from the document-term matrix used for 'bow' feature extraction."
  )
  if final_sort_by:
- assert final_sort_by in ["order", "rating"]
+ if final_sort_by not in ["order", "rating"]:
+ raise AssertionError
 
  if title_generation: # if final_sort_by and title_generation
  raise Exception(
@@ -876,11 +881,12 @@ def structured_joined_sum(
  If ``to_json`` is a path (string), then the JSON data will be dumped to the file specified
  and the path to the file will be returned.
  """
- assert summarization_method in [
+ if summarization_method not in [
  "abstractive",
  "extractive",
  "none",
- ], "Invalid summarization method"
+ ]:
+ raise AssertionError("Invalid summarization method")
 
  first_slide_frame_num = int(first_slide_frame_num)
 

diff --git a/lecture2notes/end_to_end/transcribe/mic_vad_streaming.py b/lecture2notes/end_to_end/transcribe/mic_vad_streaming.py
@@ -103,7 +103,8 @@ def write_wav(self, filename, data):
  wf = wave.open(filename, "wb")
  wf.setnchannels(self.CHANNELS)
  # wf.setsampwidth(self.pa.get_sample_size(FORMAT))
- assert self.FORMAT == pyaudio.paInt16
+ if self.FORMAT != pyaudio.paInt16:
+ raise AssertionError
  wf.setsampwidth(2)
  wf.setframerate(self.sample_rate)
  wf.writeframes(data)

diff --git a/lecture2notes/end_to_end/transcribe/transcribe_main.py b/lecture2notes/end_to_end/transcribe/transcribe_main.py
@@ -72,7 +72,8 @@ def transcribe_audio_generic(audio_path, method="sphinx", **kwargs):
  Returns:
  str: the transcript of the audio file
  """
- assert method in ["sphinx", "google"]
+ if method not in ["sphinx", "google"]:
+ raise AssertionError
  transcript = None
  logger.debug("Initializing speech_recognition library")
  r = sr.Recognizer()
@@ -245,7 +246,8 @@ def read_wave(path, desired_sample_rate=None, force=False):
  """
  with contextlib.closing(wave.open(str(path), "rb")) as wf:
  sample_width = wf.getsampwidth()
- assert sample_width == 2
+ if sample_width != 2:
+ raise AssertionError
  sample_rate = wf.getframerate()
  frames = wf.getnframes()
  duration = frames / sample_rate
@@ -608,12 +610,13 @@ def chunk_by_speech(
  tuple: (segments, sample_rate, audio_length). See :meth:`~lecture2notes.end_to_end.transcribe.webrtcvad_utils.vad_segment_generator`.
  """
  if desired_sample_rate:
- assert desired_sample_rate in (
+ if desired_sample_rate not in (
  8000,
  16000,
  32000,
  48000,
- ), "The WebRTC VAD only accepts 16-bit mono PCM audio, sampled at 8000, 16000, 32000 or 48000 Hz."
+ ):
+ raise AssertionError("The WebRTC VAD only accepts 16-bit mono PCM audio, sampled at 8000, 16000, 32000 or 48000 Hz.")
 
  segments, sample_rate, audio_length = webrtcvad_utils.vad_segment_generator(
  audio_path,
@@ -781,7 +784,8 @@ def process_chunks(chunk_dir, method="sphinx", model_dir=None):
  if chunk.endswith(".wav"):
  chunk_path = Path(chunk_dir) / chunk
  if method == "deepspeech" or method == "vosk":
- assert model_dir is not None
+ if model_dir is None:
+ raise AssertionError
  model = load_model(method, model_dir)
  transcript, transcript_json = transcribe_audio(
  chunk_path, method, model=model
@@ -805,7 +809,8 @@ def caption_file_to_string(transcript_path, remove_speakers=False):
  Optionally removes speaker entries by removing everything before ": " in each subtitle cell.
  """
  transcript_path = Path(transcript_path)
- assert transcript_path.is_file()
+ if not transcript_path.is_file():
+ raise AssertionError
  if transcript_path.suffix == ".srt":
  subtitles = webvtt.from_srt(transcript_path)
  elif transcript_path.suffix == ".sbv":

diff --git a/lecture2notes/end_to_end/transcribe/webrtcvad_utils.py b/lecture2notes/end_to_end/transcribe/webrtcvad_utils.py
@@ -131,12 +131,13 @@ def vad_segment_generator(wavFile, aggressiveness, desired_sample_rate=None):
 
  logging.debug("Caught the wav file @: %s" % (wavFile))
  audio, sample_rate, audio_length = read_wave(wavFile, desired_sample_rate)
- assert sample_rate in (
+ if sample_rate not in (
  8000,
  16000,
  32000,
  48000,
- ), "The WebRTC VAD only accepts 16-bit mono PCM audio, sampled at 8000, 16000, 32000 or 48000 Hz."
+ ):
+ raise AssertionError("The WebRTC VAD only accepts 16-bit mono PCM audio, sampled at 8000, 16000, 32000 or 48000 Hz.")
  vad = webrtcvad.Vad(int(aggressiveness))
  frames = frame_generator(30, audio, sample_rate)
  frames = list(frames)

diff --git a/lecture2notes/models/slide_classifier/class_cluster_scikit.py b/lecture2notes/models/slide_classifier/class_cluster_scikit.py
@@ -31,7 +31,8 @@ def __init__(
  self.vectors = OrderedDict()
 
  algorithms = ["kmeans", "affinity_propagation"]
- assert algorithm_name in algorithms
+ if algorithm_name not in algorithms:
+ raise AssertionError
 
  self.algorithm_name = algorithm_name
  self.centroids = None
@@ -186,7 +187,8 @@ def calculate_best_k(self, max_k=50):
  """
  # Elbow method: https://www.geeksforgeeks.org/elbow-method-for-optimal-value-of-k-in-kmeans/
  # Other methods: https://en.wikipedia.org/wiki/Determining_the_number_of_clusters_in_a_data_set
- assert self.algorithm_name == "kmeans"
+ if self.algorithm_name != "kmeans":
+ raise AssertionError
  costs = []
  for i in range(1, max_k):
  kmeans, _, cost, _ = self.create_kmeans(num_centroids=i, store=False)