v0.13 - gputil best gpu choice mapping

FlyingFathead · May 28, 2024 · 1efe523 · 1efe523
1 parent e03f854
commit 1efe523
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -1,21 +1,21 @@
 # whisper-transcriber-telegram-bot
 
-A Python-based Whisper AI transcriber bot for Telegram.
+A Python-based local (GPU/CPU) instance Whisper AI transcriber bot for Telegram.
 
 ## About
 
 This is a Whisper AI-based transcriber Telegram Bot running on Python, designed to transcribe audio from various media sources supported by `yt-dlp`. While initially focused on YouTube, the bot now supports a broad range of sites listed [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md), leveraging a locally run OpenAI's Whisper model to process audio and return the transcription in multiple formats.
 
-
 ## Features
 
 - Processes media URLs from a variety of sources supported by `yt-dlp`.
 - Downloads audio using `yt-dlp` from supported sites including but not limited to YouTube.
 - Uses a local model from the `openai-whisper` package for transcription.
+- Automatically uses `GPUtil` to map out the best available CUDA-enabled GPU.
 - Transcribes audio using OpenAI's Whisper model
  - (see [openai/whisper](https://github.com/openai/whisper/) for more info)
 - Returns transcription in text, SRT, and VTT formats.
-- Handles concurrent transcription requests efficiently.
+- Handles concurrent transcription requests efficiently with async & task queuing.
 
 ## Installation
 
@@ -53,6 +53,7 @@ After launching the bot, you can interact with it via Telegram:
 
 ## Changes
 
+- v0.13 - added `GPUtil` GPU mapping to figure out the best available CUDA GPU instance to use (most free VRAM)
 - v0.12 - async handling & user model change fixes, improved error handling
 - v0.11.1 - bot logic + layout changes, model list with `/model` (also in `config.ini`)
 - v0.11 - bugfixes & rate limits for `/model` command changes for users

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 python-telegram-bot
 yt-dlp
-openai-whisper
+openai-whisper
+gputil
diff --git a/src/main.py b/src/main.py
@@ -3,7 +3,7 @@
 # openai-whisper transcriber-bot for Telegram
 
 # version of this program
-version_number = "0.12"
+version_number = "0.13"
 
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/

diff --git a/src/transcription_handler.py b/src/transcription_handler.py
@@ -6,6 +6,7 @@
 # https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+import GPUtil 
 import sys
 import time
 import logging
@@ -214,11 +215,15 @@ def log_stderr(line):
  logger.error(f"Whisper stderr: {line.strip()}")
 
 # transcription logic with header inclusion based on settings
-async def transcribe_audio(audio_path, output_dir, youtube_url, video_info_message, include_header, model):
+# (always tries to use the gpu that's available with most free VRAM)
+async def transcribe_audio(audio_path, output_dir, youtube_url, video_info_message, include_header, model, device):
+ log_gpu_utilization() # Log GPU utilization before starting transcription
+
+ logger.info(f"Using device: {device} for transcription")
 
  logger.info(f"Starting transcription with model '{model}' for: {audio_path}")
 
- transcription_command = ["whisper", audio_path, "--model", model, "--output_dir", output_dir]
+ transcription_command = ["whisper", audio_path, "--model", model, "--output_dir", output_dir, "--device", device]
 
  # Start the subprocess and get stdout, stderr streams
  process = await asyncio.create_subprocess_exec(
@@ -274,10 +279,9 @@ async def transcribe_audio(audio_path, output_dir, youtube_url, video_info_messa
  return created_files
 
 # Process the message's URL and keep the user informed
+# (Added in the new GPU logging function call to the process_url_message function)
 async def process_url_message(message_text, bot, update, model):
-
  try:
-
  # Get general settings right at the beginning of the function
  settings = get_general_settings()
 
@@ -294,7 +298,6 @@ async def process_url_message(message_text, bot, update, model):
  urls = re.findall(r'(https?:https://\S+)', message_text)
 
  for url in urls:
-
  # Normalize the YouTube URL to strip off any unnecessary parameters
  # normalized_url = normalize_youtube_url(url)
 
@@ -387,12 +390,25 @@ async def process_url_message(message_text, bot, update, model):
 
  await bot.send_message(chat_id=update.effective_chat.id, text=detailed_message)
 
+ # Get the best GPU for transcription
+ best_gpu = get_best_gpu()
+ if best_gpu:
+ device = f'cuda:{best_gpu.id}'
+ gpu_message = (
+ f"Using GPU {best_gpu.id}: {best_gpu.name}\n"
+ f"Free Memory: {best_gpu.memoryFree} MB\n"
+ f"Load: {best_gpu.load * 100:.1f}%"
+ )
+ else:
+ device = 'cpu'
+ gpu_message = "No GPU available, using CPU for transcription."
+
+ # Log and send the GPU information to the user
+ logger.info(gpu_message)
+ await bot.send_message(chat_id=update.effective_chat.id, text=gpu_message)
+
  # Transcribe the audio and handle transcription output
- model = get_whisper_model(user_id) # Ensure you fetch the current model setting
- if not model:
- logger.error("Failed to retrieve the transcription model.")
- return
- transcription_paths = await transcribe_audio(audio_path, output_dir, normalized_url, video_info_message, include_header, model)
+ transcription_paths = await transcribe_audio(audio_path, output_dir, normalized_url, video_info_message, include_header, model, device)
 
  if not transcription_paths:
  # Notify if transcription fails
@@ -596,3 +612,23 @@ def estimate_transcription_time(model, audio_duration):
  relative_speed = model_speeds.get(model, 1) # Default to 1 if model not found
  estimated_time = baseline_time / relative_speed
  return estimated_time
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# get the best GPU availability
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# Function to get GPU utilization and select the GPU with the most free memory
+def get_best_gpu():
+ gpus = GPUtil.getGPUs()
+ if not gpus:
+ logger.error("No GPUs found")
+ return 'cpu'
+
+ best_gpu = max(gpus, key=lambda gpu: gpu.memoryFree)
+ return best_gpu if best_gpu.memoryFree > 0 else None
+
+# Add a new function to log GPU utilization details
+def log_gpu_utilization():
+ gpus = GPUtil.getGPUs()
+ for gpu in gpus:
+ logger.info(f"GPU {gpu.id}: {gpu.name}, Load: {gpu.load * 100:.1f}%, Free Memory: {gpu.memoryFree} MB, Used Memory: {gpu.memoryUsed} MB, Total Memory: {gpu.memoryTotal} MB")