Merge pull request BerriAI#4724 from BerriAI/litellm_Set_max_file_siz…

…e_transc [Feat] - set max file size on /audio/transcriptions
nidhishs · Jul 16, 2024 · 254ac37 · 254ac37
2 parents af19a2a + e1c1149
commit 254ac37
Show file tree

Hide file tree

Showing 8 changed files with 144 additions and 5 deletions.
diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md
@@ -27,6 +27,7 @@ This covers:
  - ✅ IP address‑based access control lists
  - ✅ Track Request IP Address
  - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
+ - ✅ Set Max Request / File Size on Requests
  - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests)
  - **Spend Tracking**
  - ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags)

diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md
@@ -21,6 +21,7 @@ Features:
  - ✅ IP address‑based access control lists
  - ✅ Track Request IP Address
  - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
+ - ✅ Set Max Request / File Size on Requests
  - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
 - **Spend Tracking**
  - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)

diff --git a/docs/my-website/docs/text_to_speech.md b/docs/my-website/docs/text_to_speech.md
@@ -109,4 +109,33 @@ response = speech(
  input="the quick brown fox jumped over the lazy dogs",
  )
 response.stream_to_file(speech_file_path)
+```
+
+## ✨ Enterprise LiteLLM Proxy - Set Max Request File Size 
+
+Use this when you want to limit the file size for requests sent to `audio/transcriptions`
+
+```yaml
+- model_name: whisper
+ litellm_params:
+ model: whisper-1
+ api_key: sk-*******
+ max_file_size_mb: 0.00001 # 👈 max file size in MB (Set this intentionally very small for testing)
+ model_info:
+ mode: audio_transcription
+```
+
+Make a test Request with a valid file
+```shell
+curl --location 'http:https://localhost:4000/v1/audio/transcriptions' \
+--header 'Authorization: Bearer sk-1234' \
+--form 'file=@"/Users/ishaanjaffer/Github/litellm/tests/gettysburg.wav"' \
+--form 'model="whisper"'
+```
+
+
+Expect to see the follow response 
+
+```shell
+{"error":{"message":"File size is too large. Please check your file size. Passed file size: 0.7392807006835938 MB. Max file size: 0.0001 MB","type":"bad_request","param":"file","code":500}}% 
 ```
diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py
@@ -1,6 +1,11 @@
-from typing import Optional
-from fastapi import Request
-import ast, json
+import ast
+import json
+from typing import List, Optional
+
+from fastapi import Request, UploadFile, status
+
+from litellm._logging import verbose_proxy_logger
+from litellm.types.router import Deployment
 
 
 async def _read_request_body(request: Optional[Request]) -> dict:
@@ -29,3 +34,66 @@ async def _read_request_body(request: Optional[Request]) -> dict:
  return request_data
  except:
  return {}
+
+
+def check_file_size_under_limit(
+ request_data: dict,
+ file: UploadFile,
+ router_model_names: List[str],
+) -> bool:
+ """
+ Check if any files passed in request are under max_file_size_mb
+
+ Returns True -> when file size is under max_file_size_mb limit
+ Raises ProxyException -> when file size is over max_file_size_mb limit or not a premium_user
+ """
+ from litellm.proxy.proxy_server import (
+ CommonProxyErrors,
+ ProxyException,
+ llm_router,
+ premium_user,
+ )
+
+ file_contents_size = file.size or 0
+ file_content_size_in_mb = file_contents_size / (1024 * 1024)
+
+ if llm_router is not None and request_data["model"] in router_model_names:
+ try:
+ deployment: Optional[Deployment] = (
+ llm_router.get_deployment_by_model_group_name(
+ model_group_name=request_data["model"]
+ )
+ )
+ if (
+ deployment
+ and deployment.litellm_params is not None
+ and deployment.litellm_params.max_file_size_mb is not None
+ ):
+ max_file_size_mb = deployment.litellm_params.max_file_size_mb
+ except Exception as e:
+ verbose_proxy_logger.error(
+ "Got error when checking file size: %s", (str(e))
+ )
+
+ if max_file_size_mb is not None:
+ verbose_proxy_logger.debug(
+ "Checking file size, file content size=%s, max_file_size_mb=%s",
+ file_content_size_in_mb,
+ max_file_size_mb,
+ )
+ if not premium_user:
+ raise ProxyException(
+ message=f"Tried setting max_file_size_mb for /audio/transcriptions. {CommonProxyErrors.not_premium_user.value}",
+ code=status.HTTP_400_BAD_REQUEST,
+ type="bad_request",
+ param="file",
+ )
+ if file_content_size_in_mb > max_file_size_mb:
+ raise ProxyException(
+ message=f"File size is too large. Please check your file size. Passed file size: {file_content_size_in_mb} MB. Max file size: {max_file_size_mb} MB",
+ code=status.HTTP_400_BAD_REQUEST,
+ type="bad_request",
+ param="file",
+ )
+
+ return True
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
@@ -7,6 +7,13 @@ model_list:
  - model_name: gemini-flash
  litellm_params:
  model: gemini/gemini-1.5-flash
+ - model_name: whisper
+ litellm_params:
+ model: whisper-1
+ api_key: sk-*******
+ max_file_size_mb: 1000
+ model_info:
+ mode: audio_transcription
 
 general_settings: 
  master_key: sk-1234 

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -143,7 +143,10 @@ def generate_feedback_box():
  decrypt_value_helper,
  encrypt_value_helper,
 )
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm.proxy.common_utils.http_parsing_utils import (
+ _read_request_body,
+ check_file_size_under_limit,
+)
 from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
 from litellm.proxy.common_utils.openai_endpoint_utils import (
  remove_sensitive_info_from_deployment,
@@ -3796,7 +3799,13 @@ async def audio_transcriptions(
  param="file",
  )
 
- # Instead of writing to a file
+ # Check if File can be read in memory before reading
+ check_file_size_under_limit(
+ request_data=data,
+ file=file,
+ router_model_names=router_model_names,
+ )
+
  file_content = await file.read()
  file_object = io.BytesIO(file_content)
  file_object.name = file.filename

diff --git a/litellm/router.py b/litellm/router.py
@@ -3684,6 +3684,24 @@ def get_deployment(self, model_id: str) -> Optional[Deployment]:
  raise Exception("Model invalid format - {}".format(type(model)))
  return None
 
+ def get_deployment_by_model_group_name(
+ self, model_group_name: str
+ ) -> Optional[Deployment]:
+ """
+ Returns -> Deployment or None
+
+ Raise Exception -> if model found in invalid format
+ """
+ for model in self.model_list:
+ if model["model_name"] == model_group_name:
+ if isinstance(model, dict):
+ return Deployment(**model)
+ elif isinstance(model, Deployment):
+ return model
+ else:
+ raise Exception("Model Name invalid - {}".format(type(model)))
+ return None
+
  def get_router_model_info(self, deployment: dict) -> ModelMapInfo:
  """
  For a given model id, return the model info (max tokens, input cost, output cost, etc.).

diff --git a/litellm/types/router.py b/litellm/types/router.py
@@ -154,6 +154,8 @@ class GenericLiteLLMParams(BaseModel):
  input_cost_per_second: Optional[float] = None
  output_cost_per_second: Optional[float] = None
 
+ max_file_size_mb: Optional[float] = None
+
  model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
 
  def __init__(
@@ -185,6 +187,7 @@ def __init__(
  output_cost_per_token: Optional[float] = None,
  input_cost_per_second: Optional[float] = None,
  output_cost_per_second: Optional[float] = None,
+ max_file_size_mb: Optional[float] = None,
  **params,
  ):
  args = locals()
@@ -243,6 +246,9 @@ def __init__(
  aws_access_key_id: Optional[str] = None,
  aws_secret_access_key: Optional[str] = None,
  aws_region_name: Optional[str] = None,
+ # OpenAI / Azure Whisper
+ # set a max-size of file that can be passed to litellm proxy
+ max_file_size_mb: Optional[float] = None,
  **params,
  ):
  args = locals()