BerriAI · krrishdholakia · Jun 14, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 14, 2024
diff --git a/docs/my-website/docs/proxy/alerting.md b/docs/my-website/docs/proxy/alerting.md
@@ -1,3 +1,5 @@
+import Image from '@theme/IdealImage';
+
 # 🚨 Alerting / Webhooks
 
 Get alerts for:
@@ -15,6 +17,11 @@ Get alerts for:
 - **Spend** Weekly & Monthly spend per Team, Tag
 
 
+Works across: 
+- [Slack](#quick-start)
+- [Discord](#advanced---using-discord-webhooks)
+- [Microsoft Teams](#advanced---using-ms-teams-webhooks)
+
 ## Quick Start
 
 Set up a slack alert channel to receive alerts from proxy.
@@ -108,6 +115,48 @@ AlertType = Literal[
 ```
 
 
+## Advanced - Using MS Teams Webhooks
+
+MS Teams provides a slack compatible webhook url that you can use for alerting
+
+##### Quick Start
+
+1. [Get a webhook url](https://learn.microsoft.com/en-us/microsoftteams/platform/webhooks-and-connectors/how-to/add-incoming-webhook?tabs=newteams%2Cdotnet#create-an-incoming-webhook) for your Microsoft Teams channel 
+
+2. Add it to your .env
+
+```bash
+SLACK_WEBHOOK_URL="https://berriai.webhook.office.com/webhookb2/...6901/IncomingWebhook/b55fa0c2a48647be8e6effedcd540266/e04b1092-4a3e-44a2-ab6b-29a0a4854d1d"
+```
+
+3. Add it to your litellm config 
+
+```yaml
+model_list: 
+ model_name: "azure-model"
+ litellm_params:
+ model: "azure/gpt-35-turbo"
+ api_key: "my-bad-key" # 👈 bad key
+
+general_settings: 
+ alerting: ["slack"]
+ alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+ 
+```
+
+4. Run health check!
+
+Call the proxy `/health/services` endpoint to test if your alerting connection is correctly setup.
+
+```bash
+curl --location 'http:https://0.0.0.0:4000/health/services?service=slack' \
+--header 'Authorization: Bearer sk-1234'
+```
+
+
+**Expected Response**
+
+<Image img={require('../../img/ms_teams_alerting.png')}/>
+
 ## Advanced - Using Discord Webhooks
 
 Discord provides a slack compatible webhook url that you can use for alerting
@@ -139,7 +188,6 @@ environment_variables:
  SLACK_WEBHOOK_URL: "https://discord.com/api/webhooks/1240030362193760286/cTLWt5ATn1gKmcy_982rl5xmYHsrM1IWJdmCL1AyOmU9JdQXazrp8L1_PYgUtgxj8x4f/slack"
 ```
 
-That's it ! You're ready to go !
 
 ## Advanced - [BETA] Webhooks for Budget Alerts
 

diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
@@ -1,4 +1,4 @@
-# Grafana, Prometheus metrics [BETA]
+# 📈 Prometheus metrics [BETA]
 
 LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
 
@@ -54,6 +54,13 @@ http:https://localhost:4000/metrics
 | `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
 | `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
 
+### Budget Metrics
+| Metric Name | Description |
+|----------------------|--------------------------------------|
+| `litellm_remaining_team_budget_metric` | Remaining Budget for Team (A team created on LiteLLM) |
+| `litellm_remaining_api_key_budget_metric` | Remaining Budget for API Key (A key Created on LiteLLM)|
+
+
 ## Monitor System Health
 
 To monitor the health of litellm adjacent services (redis / postgres), do:

diff --git a/docs/my-website/docs/proxy/team_budgets.md b/docs/my-website/docs/proxy/team_budgets.md
@@ -0,0 +1,123 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# 💰 Setting Team Budgets
+
+Track spend, set budgets for your Internal Team
+
+## Setting Monthly Team Budgets
+
+### 1. Create a team 
+- Set `max_budget=000000001` ($ value the team is allowed to spend)
+- Set `budget_duration="1d"` (How frequently the budget should update)
+
+
+Create a new team and set `max_budget` and `budget_duration`
+```shell
+curl -X POST 'http:https://0.0.0.0:4000/team/new' \
+ -H 'Authorization: Bearer sk-1234' \
+ -H 'Content-Type: application/json' \
+ -d '{
+ "team_alias": "QA Prod Bot", 
+ "max_budget": 0.000000001, 
+ "budget_duration": "1d"
+ }' 
+```
+
+Response
+```shell
+{
+ "team_alias": "QA Prod Bot",
+ "team_id": "de35b29e-6ca8-4f47-b804-2b79d07aa99a",
+ "max_budget": 0.0001,
+ "budget_duration": "1d",
+ "budget_reset_at": "2024-06-14T22:48:36.594000Z"
+} 
+```
+
+
+
+Possible values for `budget_duration`
+
+| `budget_duration` | When Budget will reset |
+| --- | --- |
+| `budget_duration="1s"` | every 1 second |
+| `budget_duration="1m"` | every 1 min |
+| `budget_duration="1h"` | every 1 hour |
+| `budget_duration="1d"` | every 1 day |
+| `budget_duration="1mo"` | start of every month |
+
+
+### 2. Create a key for the `team`
+
+Create a key for `team_id="de35b29e-6ca8-4f47-b804-2b79d07aa99a"` from Step 1 
+
+💡 **The Budget for Team="QA Prod Bot" budget will apply to this team**
+
+```shell
+curl -X POST 'http:https://0.0.0.0:4000/key/generate' \
+ -H 'Authorization: Bearer sk-1234' \
+ -H 'Content-Type: application/json' \
+ -d '{"team_id": "de35b29e-6ca8-4f47-b804-2b79d07aa99a"}'
+```
+
+Response
+
+```shell
+{"team_id":"de35b29e-6ca8-4f47-b804-2b79d07aa99a", "key":"sk-5qtncoYjzRcxMM4bDRktNQ"}
+```
+
+
+### 3. Test It
+
+Use the key from step 2 and run this Request twice
+```shell
+curl -X POST 'http:https://0.0.0.0:4000/chat/completions' \
+ -H 'Authorization: Bearer sk-mso-JSykEGri86KyOvgxBw' \
+ -H 'Content-Type: application/json' \
+ -d ' {
+ "model": "llama3",
+ "messages": [
+ {
+ "role": "user",
+ "content": "hi"
+ }
+ ]
+ }'
+```
+
+On the 2nd response - expect to see the following exception
+
+```shell
+{
+ "error": {
+ "message": "Budget has been exceeded! Current cost: 3.5e-06, Max budget: 1e-09",
+ "type": "auth_error",
+ "param": null,
+ "code": 400
+ }
+}
+```
+
+## Advanced
+
+### Prometheus metrics for `remaining_budget`
+
+[More info about Prometheus metrics here](https://docs.litellm.ai/docs/proxy/prometheus)
+
+You'll need the following in your proxy config.yaml
+
+```yaml
+litellm_settings:
+ success_callback: ["prometheus"]
+ failure_callback: ["prometheus"]
+```
+
+Expect to see this metric on prometheus to track the Remaining Budget for the team
+
+```shell
+litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-6ca8-4f47-b804-2b79d07aa99a"} 9.699999999999992e-06
+```
+
+
diff --git a/docs/my-website/img/ms_teams_alerting.png b/docs/my-website/img/ms_teams_alerting.png
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
@@ -44,6 +44,7 @@ const sidebars = {
  "proxy/self_serve",
  "proxy/users",
  "proxy/customers",
+ "proxy/team_budgets",
  "proxy/billing",
  "proxy/user_keys",
  "proxy/virtual_keys",
@@ -54,6 +55,7 @@ const sidebars = {
  items: ["proxy/logging", "proxy/streaming_logging"],
  },
  "proxy/ui",
+ "proxy/prometheus",
  "proxy/email",
  "proxy/multiple_admins",
  "proxy/team_based_routing",
@@ -70,7 +72,6 @@ const sidebars = {
  "proxy/pii_masking",
  "proxy/prompt_injection",
  "proxy/caching",
- "proxy/prometheus",
  "proxy/call_hooks",
  "proxy/rules",
  "proxy/cli", 

diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -73,7 +73,7 @@
 )
 telemetry = True
 max_tokens = 256 # OpenAI Defaults
-drop_params = False
+drop_params = bool(os.getenv("LITELLM_DROP_PARAMS", False))
 modify_params = False
 retry = True
 ### AUTH ###

diff --git a/litellm/caching.py b/litellm/caching.py
@@ -1192,7 +1192,7 @@ def get_cache(self, key, **kwargs):
  return cached_response
  except botocore.exceptions.ClientError as e:
  if e.response["Error"]["Code"] == "NoSuchKey":
- verbose_logger.error(
+ verbose_logger.debug(
  f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
  )
  return None

diff --git a/litellm/exceptions.py b/litellm/exceptions.py
@@ -26,7 +26,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 401
- self.message = message
+ self.message = "litellm.AuthenticationError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.litellm_debug_info = litellm_debug_info
@@ -72,7 +72,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 404
- self.message = message
+ self.message = "litellm.NotFoundError: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -117,7 +117,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 400
- self.message = message
+ self.message = "litellm.BadRequestError: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -162,7 +162,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 422
- self.message = message
+ self.message = "litellm.UnprocessableEntityError: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -204,7 +204,7 @@ def __init__(
  request=request
  ) # Call the base class constructor with the parameters it needs
  self.status_code = 408
- self.message = message
+ self.message = "litellm.Timeout: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -241,7 +241,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 403
- self.message = message
+ self.message = "litellm.PermissionDeniedError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.litellm_debug_info = litellm_debug_info
@@ -280,7 +280,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 429
- self.message = message
+ self.message = "litellm.RateLimitError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.litellm_debug_info = litellm_debug_info
@@ -328,7 +328,7 @@ def __init__(
  litellm_debug_info: Optional[str] = None,
  ):
  self.status_code = 400
- self.message = message
+ self.message = "litellm.ContextWindowExceededError: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -368,7 +368,7 @@ def __init__(
  litellm_debug_info: Optional[str] = None,
  ):
  self.status_code = 400
- self.message = message
+ self.message = "litellm.RejectedRequestError: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -411,7 +411,7 @@ def __init__(
  litellm_debug_info: Optional[str] = None,
  ):
  self.status_code = 400
- self.message = message
+ self.message = "litellm.ContentPolicyViolationError: {}".format(message)
  self.model = model
  self.llm_provider = llm_provider
  self.litellm_debug_info = litellm_debug_info
@@ -452,7 +452,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 503
- self.message = message
+ self.message = "litellm.ServiceUnavailableError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.litellm_debug_info = litellm_debug_info
@@ -501,7 +501,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = 500
- self.message = message
+ self.message = "litellm.InternalServerError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.litellm_debug_info = litellm_debug_info
@@ -552,7 +552,7 @@ def __init__(
  num_retries: Optional[int] = None,
  ):
  self.status_code = status_code
- self.message = message
+ self.message = "litellm.APIError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.litellm_debug_info = litellm_debug_info
@@ -589,7 +589,7 @@ def __init__(
  max_retries: Optional[int] = None,
  num_retries: Optional[int] = None,
  ):
- self.message = message
+ self.message = "litellm.APIConnectionError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  self.status_code = 500
@@ -626,7 +626,7 @@ def __init__(
  max_retries: Optional[int] = None,
  num_retries: Optional[int] = None,
  ):
- self.message = message
+ self.message = "litellm.APIResponseValidationError: {}".format(message)
  self.llm_provider = llm_provider
  self.model = model
  request = httpx.Request(method="POST", url="https://api.openai.com/v1")