Update decoding.py

Changes from openai/whisper#914
FernanOrtega · Mar 24, 2023 · 33dd3b9 · 33dd3b9
1 parent d1b4ff8
commit 33dd3b9
Showing 1 changed file with 7 additions and 1 deletion.
diff --git a/whisperx/decoding.py b/whisperx/decoding.py
@@ -413,7 +413,8 @@ def apply(self, logits: Tensor, tokens: Tensor):
 
  # timestamps have to appear in pairs, except directly before EOT; mask logits accordingly
  for k in range(tokens.shape[0]):
- seq = [t for t in tokens[k, self.sample_begin :].tolist()]
+ sampled_tokens = tokens[k, self.sample_begin :]
+ seq = [t for t in sampled_tokens.tolist()]
  last_was_timestamp = len(seq) >= 1 and seq[-1] >= self.tokenizer.timestamp_begin
  penultimate_was_timestamp = len(seq) < 2 or seq[-2] >= self.tokenizer.timestamp_begin
 
@@ -422,6 +423,11 @@ def apply(self, logits: Tensor, tokens: Tensor):
  logits[k, self.tokenizer.timestamp_begin :] = -np.inf
  else: # cannot be normal text tokens
  logits[k, : self.tokenizer.eot] = -np.inf
+
+ timestamps = sampled_tokens[sampled_tokens.ge(self.tokenizer.timestamp_begin)]
+ if timestamps.numel() > 0:
+ # timestamps shouldn't decrease; forbid timestamp tokens smaller than the last
+ logits[k, self.tokenizer.timestamp_begin : timestamps[-1]] = -np.inf
 
  if tokens.shape[1] == self.sample_begin:
  # suppress generating non-timestamp tokens at the beginning