Skip to content

Commit

Permalink
print '?' if a letter can't be encoded using the system default encod…
Browse files Browse the repository at this point in the history
…ing (openai#859)
  • Loading branch information
jongwook authored and abyesilyurt committed Nov 13, 2023
1 parent 700362b commit 8f94192
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion whisper/transcribe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import os
import sys
import warnings
from typing import List, Optional, Tuple, Union, TYPE_CHECKING

Expand Down Expand Up @@ -167,7 +168,10 @@ def add_segment(
)

if verbose:
print(f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}")
line = f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}\n"
# compared to just `print(line)`, this replaces any character not representable using
# the system default encoding with an '?', avoiding UnicodeEncodeError.
sys.stderr.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace"))

# show the progress bar when verbose is False (otherwise the transcribed text will be printed)
num_frames = mel.shape[-1]
Expand Down

0 comments on commit 8f94192

Please sign in to comment.