Skip to content

Commit

Permalink
fix(lowest_tpm_rpm_v2.py): add more detail to 'No deployments availab…
Browse files Browse the repository at this point in the history
…le' error message
  • Loading branch information
krrishdholakia committed Apr 29, 2024
1 parent de3e642 commit f10a066
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 5 deletions.
6 changes: 3 additions & 3 deletions litellm/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -2807,7 +2807,7 @@ def _pre_call_checks(

if _rate_limit_error == True: # allow generic fallback logic to take place
raise ValueError(
f"No deployments available for selected model, passed model={model}"
f"{RouterErrors.no_deployments_available.value}, passed model={model}"
)
elif _context_window_error == True:
raise litellm.ContextWindowExceededError(
Expand Down Expand Up @@ -3000,7 +3000,7 @@ async def async_get_available_deployment(
f"get_available_deployment for model: {model}, No deployment available"
)
raise ValueError(
f"No deployments available for selected model, passed model={model}"
f"{RouterErrors.no_deployments_available.value}, passed model={model}"
)
verbose_router_logger.info(
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
Expand Down Expand Up @@ -3130,7 +3130,7 @@ def get_available_deployment(
f"get_available_deployment for model: {model}, No deployment available"
)
raise ValueError(
f"No deployments available for selected model, passed model={model}"
f"{RouterErrors.no_deployments_available.value}, passed model={model}"
)
verbose_router_logger.info(
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
Expand Down
115 changes: 113 additions & 2 deletions litellm/router_strategy/lowest_tpm_rpm_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ async def async_get_available_deployments(

dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")

tpm_keys = []
rpm_keys = []
for m in healthy_deployments:
Expand All @@ -416,7 +417,7 @@ async def async_get_available_deployments(
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]

return self._common_checks_available_deployment(
deployment = self._common_checks_available_deployment(
model_group=model_group,
healthy_deployments=healthy_deployments,
tpm_keys=tpm_keys,
Expand All @@ -427,6 +428,61 @@ async def async_get_available_deployments(
input=input,
)

try:
assert deployment is not None
return deployment
except Exception as e:
### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ###
deployment_dict = {}
for index, _deployment in enumerate(healthy_deployments):
if isinstance(_deployment, dict):
id = _deployment.get("model_info", {}).get("id")
### GET DEPLOYMENT TPM LIMIT ###
_deployment_tpm = None
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("tpm", None)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("litellm_params", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("model_info", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = float("inf")

### GET CURRENT TPM ###
current_tpm = tpm_values[index]

### GET DEPLOYMENT TPM LIMIT ###
_deployment_rpm = None
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("rpm", None)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("litellm_params", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("model_info", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = float("inf")

### GET CURRENT RPM ###
current_rpm = rpm_values[index]

deployment_dict[id] = {
"current_tpm": current_tpm,
"tpm_limit": _deployment_tpm,
"current_rpm": current_rpm,
"rpm_limit": _deployment_rpm,
}
raise ValueError(
f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}"
)

def get_available_deployments(
self,
model_group: str,
Expand Down Expand Up @@ -464,7 +520,7 @@ def get_available_deployments(
keys=rpm_keys
) # [1, 2, None, ..]

return self._common_checks_available_deployment(
deployment = self._common_checks_available_deployment(
model_group=model_group,
healthy_deployments=healthy_deployments,
tpm_keys=tpm_keys,
Expand All @@ -474,3 +530,58 @@ def get_available_deployments(
messages=messages,
input=input,
)

try:
assert deployment is not None
return deployment
except Exception as e:
### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ###
deployment_dict = {}
for index, _deployment in enumerate(healthy_deployments):
if isinstance(_deployment, dict):
id = _deployment.get("model_info", {}).get("id")
### GET DEPLOYMENT TPM LIMIT ###
_deployment_tpm = None
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("tpm", None)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("litellm_params", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("model_info", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = float("inf")

### GET CURRENT TPM ###
current_tpm = tpm_values[index]

### GET DEPLOYMENT TPM LIMIT ###
_deployment_rpm = None
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("rpm", None)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("litellm_params", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("model_info", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = float("inf")

### GET CURRENT RPM ###
current_rpm = rpm_values[index]

deployment_dict[id] = {
"current_tpm": current_tpm,
"tpm_limit": _deployment_tpm,
"current_rpm": current_rpm,
"rpm_limit": _deployment_rpm,
}
raise ValueError(
f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}"
)
1 change: 1 addition & 0 deletions litellm/types/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,4 @@ class RouterErrors(enum.Enum):
"""

user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
no_deployments_available = "No deployments available for selected model"

0 comments on commit f10a066

Please sign in to comment.