v0.14.2 - display duration & estimates

FlyingFathead · Jun 1, 2024 · 560e7cd · 560e7cd
1 parent 89861e0
commit 560e7cd
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -58,6 +58,7 @@ After launching the bot, you can interact with it via Telegram:
 
 ## Changes
 
+- v0.14.2 - display duration & estimates
 - v0.14.1 - small fixes to the file handler; more detailed exception catching
 - v0.14 - now handles both Telegram's audio messages as well as audio files (.wav, .mp3)
 - v0.13 - added `GPUtil` GPU mapping to figure out the best available CUDA GPU instance to use

diff --git a/src/main.py b/src/main.py
@@ -3,7 +3,7 @@
 # openai-whisper transcriber-bot for Telegram
 
 # version of this program
-version_number = "0.14.1"
+version_number = "0.14.2"
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/
@@ -25,7 +25,7 @@
 from telegram.ext import CommandHandler
 
 # Adjust import paths based on new structure
-from transcription_handler import process_url_message, set_user_model, get_whisper_model, transcribe_audio, get_best_gpu, get_audio_duration, estimate_transcription_time
+from transcription_handler import process_url_message, set_user_model, get_whisper_model, transcribe_audio, get_best_gpu, get_audio_duration, estimate_transcription_time, format_duration
 from utils.bot_token import get_bot_token
 from utils.utils import print_startup_message
 
@@ -156,13 +156,15 @@ async def process_queue(self):
 
                     # Calculate estimated finish time
                     current_time = datetime.now()
-                    estimated_finish_time = current_time + timedelta(minutes=estimated_minutes)
+                    estimated_finish_time = current_time + timedelta(seconds=estimated_time)
 
                     # Format messages for start and estimated finish time
                     time_now_str = current_time.strftime('%Y-%m-%d %H:%M:%S')
                     estimated_finish_time_str = estimated_finish_time.strftime('%Y-%m-%d %H:%M:%S')
 
+                    formatted_audio_duration = format_duration(audio_duration)
                     detailed_message = (
+                        f"Audio file length:\n{formatted_audio_duration}\n\n"
                         f"Whisper model in use:\n{model}\n\n"                
                         f"Estimated transcription time:\n{estimated_minutes:.1f} minutes.\n\n"
                         f"Time now:\n{time_now_str}\n\n"
@@ -197,7 +199,6 @@ async def process_queue(self):
                 self.task_queue.task_done()
             logger.info(f"Task completed for user ID {user_id}: {task}")
 
-
     async def shutdown(self, signal, loop):
         """Cleanup tasks tied to the service's shutdown."""
         logger.info(f"Received exit signal {signal.name}...")

diff --git a/src/transcription_handler.py b/src/transcription_handler.py
@@ -486,9 +486,11 @@ def format_duration(duration):
     hours, remainder = divmod(duration, 3600)
     minutes, seconds = divmod(remainder, 60)
     if hours:
-        return f"{hours}h {minutes}m {seconds}s"
+        return f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
+    elif minutes:
+        return f"{int(minutes)}m {int(seconds)}s"
     else:
-        return f"{minutes}m {seconds}s"
+        return f"{int(seconds)}s"
 
 # Fetch details for videos
 async def fetch_video_details(url, max_retries=3, base_delay=5, command_timeout=30):
@@ -630,12 +632,36 @@ def estimate_transcription_time(model, audio_duration):
     :param audio_duration: The duration of the audio in seconds.
     :return: Estimated time in seconds to transcribe the audio.
     """
+    # Ensure audio_duration is not None and is greater than 0
+    if audio_duration is None or audio_duration <= 0:
+        logger.error(f"Invalid audio duration: {audio_duration}")
+        return 0
+
+    logger.info(f"Estimating transcription time for model: {model} and audio duration: {audio_duration} seconds")
+
     # Assume 'large' model takes its duration equal to the audio's duration to transcribe.
     # Scale other models based on their relative speed.
     baseline_time = audio_duration  # This is for the 'large' model as a baseline
     relative_speed = model_speeds.get(model, 1)  # Default to 1 if model not found
     estimated_time = baseline_time / relative_speed
-    return estimated_time
+
+    logger.info(f"Estimated transcription time: {estimated_time} seconds")
+    return max(estimated_time, 60)  # Ensure at least 1 minute is shown
+
+# def estimate_transcription_time(model, audio_duration):
+#     """
+#     Estimate the transcription time based on the model size and audio duration.
+
+#     :param model: The model size used for transcription.
+#     :param audio_duration: The duration of the audio in seconds.
+#     :return: Estimated time in seconds to transcribe the audio.
+#     """
+#     # Assume 'large' model takes its duration equal to the audio's duration to transcribe.
+#     # Scale other models based on their relative speed.
+#     baseline_time = audio_duration  # This is for the 'large' model as a baseline
+#     relative_speed = model_speeds.get(model, 1)  # Default to 1 if model not found
+#     estimated_time = baseline_time / relative_speed
+#     return estimated_time
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # get the best GPU availability