Skip to content

Commit

Permalink
Improve language detection when using clip_timestamps
Browse files Browse the repository at this point in the history
  • Loading branch information
ben91lin committed Jun 6, 2024
1 parent 65551c0 commit b8cc0fc
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions faster_whisper/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,16 +370,25 @@ def transcribe(
or language_detection_segments < 1
):
language_detection_segments = 1
seek = 0
if isinstance(clip_timestamps, str):
start_timestamp = float(clip_timestamps.split(",")[0])
else:
start_timestamp = clip_timestamps[0]
detected_language_info = {}
seek = int(start_timestamp * self.frames_per_second)
content_frames = (
features.shape[-1] - self.feature_extractor.nb_max_frames
)
while (
seek <= content_frames
and seek
< self.feature_extractor.nb_max_frames * language_detection_segments
):
# If seek is beyond the full frames, set it to the last segment
if seek >= features.shape[-1]:
seek = content_frames
end_frames = min(
seek
+ self.feature_extractor.nb_max_frames
* language_detection_segments,
features.shape[-1],
)
while seek < end_frames:
segment = features[
:, seek : seek + self.feature_extractor.nb_max_frames
]
Expand Down

0 comments on commit b8cc0fc

Please sign in to comment.