Skip to content

Commit

Permalink
v0.14.2 - display duration & estimates
Browse files Browse the repository at this point in the history
  • Loading branch information
FlyingFathead committed Jun 1, 2024
1 parent 89861e0 commit 560e7cd
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ After launching the bot, you can interact with it via Telegram:

## Changes

- v0.14.2 - display duration & estimates
- v0.14.1 - small fixes to the file handler; more detailed exception catching
- v0.14 - now handles both Telegram's audio messages as well as audio files (.wav, .mp3)
- v0.13 - added `GPUtil` GPU mapping to figure out the best available CUDA GPU instance to use
Expand Down
9 changes: 5 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# openai-whisper transcriber-bot for Telegram

# version of this program
version_number = "0.14.1"
version_number = "0.14.2"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/
Expand All @@ -25,7 +25,7 @@
from telegram.ext import CommandHandler

# Adjust import paths based on new structure
from transcription_handler import process_url_message, set_user_model, get_whisper_model, transcribe_audio, get_best_gpu, get_audio_duration, estimate_transcription_time
from transcription_handler import process_url_message, set_user_model, get_whisper_model, transcribe_audio, get_best_gpu, get_audio_duration, estimate_transcription_time, format_duration
from utils.bot_token import get_bot_token
from utils.utils import print_startup_message

Expand Down Expand Up @@ -156,13 +156,15 @@ async def process_queue(self):

# Calculate estimated finish time
current_time = datetime.now()
estimated_finish_time = current_time + timedelta(minutes=estimated_minutes)
estimated_finish_time = current_time + timedelta(seconds=estimated_time)

# Format messages for start and estimated finish time
time_now_str = current_time.strftime('%Y-%m-%d %H:%M:%S')
estimated_finish_time_str = estimated_finish_time.strftime('%Y-%m-%d %H:%M:%S')

formatted_audio_duration = format_duration(audio_duration)
detailed_message = (
f"Audio file length:\n{formatted_audio_duration}\n\n"
f"Whisper model in use:\n{model}\n\n"
f"Estimated transcription time:\n{estimated_minutes:.1f} minutes.\n\n"
f"Time now:\n{time_now_str}\n\n"
Expand Down Expand Up @@ -197,7 +199,6 @@ async def process_queue(self):
self.task_queue.task_done()
logger.info(f"Task completed for user ID {user_id}: {task}")


async def shutdown(self, signal, loop):
"""Cleanup tasks tied to the service's shutdown."""
logger.info(f"Received exit signal {signal.name}...")
Expand Down
32 changes: 29 additions & 3 deletions src/transcription_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,9 +486,11 @@ def format_duration(duration):
hours, remainder = divmod(duration, 3600)
minutes, seconds = divmod(remainder, 60)
if hours:
return f"{hours}h {minutes}m {seconds}s"
return f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
elif minutes:
return f"{int(minutes)}m {int(seconds)}s"
else:
return f"{minutes}m {seconds}s"
return f"{int(seconds)}s"

# Fetch details for videos
async def fetch_video_details(url, max_retries=3, base_delay=5, command_timeout=30):
Expand Down Expand Up @@ -630,12 +632,36 @@ def estimate_transcription_time(model, audio_duration):
:param audio_duration: The duration of the audio in seconds.
:return: Estimated time in seconds to transcribe the audio.
"""
# Ensure audio_duration is not None and is greater than 0
if audio_duration is None or audio_duration <= 0:
logger.error(f"Invalid audio duration: {audio_duration}")
return 0

logger.info(f"Estimating transcription time for model: {model} and audio duration: {audio_duration} seconds")

# Assume 'large' model takes its duration equal to the audio's duration to transcribe.
# Scale other models based on their relative speed.
baseline_time = audio_duration # This is for the 'large' model as a baseline
relative_speed = model_speeds.get(model, 1) # Default to 1 if model not found
estimated_time = baseline_time / relative_speed
return estimated_time

logger.info(f"Estimated transcription time: {estimated_time} seconds")
return max(estimated_time, 60) # Ensure at least 1 minute is shown

# def estimate_transcription_time(model, audio_duration):
# """
# Estimate the transcription time based on the model size and audio duration.

# :param model: The model size used for transcription.
# :param audio_duration: The duration of the audio in seconds.
# :return: Estimated time in seconds to transcribe the audio.
# """
# # Assume 'large' model takes its duration equal to the audio's duration to transcribe.
# # Scale other models based on their relative speed.
# baseline_time = audio_duration # This is for the 'large' model as a baseline
# relative_speed = model_speeds.get(model, 1) # Default to 1 if model not found
# estimated_time = baseline_time / relative_speed
# return estimated_time

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# get the best GPU availability
Expand Down

0 comments on commit 560e7cd

Please sign in to comment.