Skip to content

Commit

Permalink
Merge pull request BerriAI#3360 from BerriAI/litellm_random_pick_lowe…
Browse files Browse the repository at this point in the history
…st_latency

[Fix] Lowest Latency routing - random pick deployments when all latencies=0
  • Loading branch information
ishaan-jaff committed Apr 29, 2024
2 parents 77f155d + 4cb4a7f commit d58dd2c
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 0 deletions.
4 changes: 4 additions & 0 deletions litellm/router_strategy/lowest_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ def get_available_deployments(
except:
input_tokens = 0

# randomly sample from all_deployments, incase all deployments have latency=0.0
_items = all_deployments.items()
all_deployments = random.sample(list(_items), len(_items))
all_deployments = dict(all_deployments)
for item, item_map in all_deployments.items():
## get the item from model list
_deployment = None
Expand Down
76 changes: 76 additions & 0 deletions litellm/tests/test_lowest_latency_routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,3 +555,79 @@ async def test_lowest_latency_routing_with_timeouts():

# ALL the Requests should have been routed to the fast-endpoint
assert deployments["fast-endpoint"] == 10


@pytest.mark.asyncio
async def test_lowest_latency_routing_first_pick():
"""
PROD Test:
- When all deployments are latency=0, it should randomly pick a deployment
- IT SHOULD NEVER PICK THE Very First deployment everytime all deployment latencies are 0
- This ensures that after the ttl window resets it randomly picks a deployment
"""
import litellm

litellm.set_verbose = True

router = Router(
model_list=[
{
"model_name": "azure-model",
"litellm_params": {
"model": "openai/fast-endpoint",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"api_key": "fake-key",
},
"model_info": {"id": "fast-endpoint"},
},
{
"model_name": "azure-model",
"litellm_params": {
"model": "openai/fast-endpoint-2",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"api_key": "fake-key",
},
"model_info": {"id": "fast-endpoint-2"},
},
{
"model_name": "azure-model",
"litellm_params": {
"model": "openai/fast-endpoint-2",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"api_key": "fake-key",
},
"model_info": {"id": "fast-endpoint-3"},
},
{
"model_name": "azure-model",
"litellm_params": {
"model": "openai/fast-endpoint-2",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"api_key": "fake-key",
},
"model_info": {"id": "fast-endpoint-4"},
},
],
routing_strategy="latency-based-routing",
routing_strategy_args={"ttl": 0.0000000001},
set_verbose=True,
debug_level="DEBUG",
) # type: ignore

deployments = {}
for _ in range(5):
response = await router.acompletion(
model="azure-model", messages=[{"role": "user", "content": "hello"}]
)
print(response)
_picked_model_id = response._hidden_params["model_id"]
if _picked_model_id not in deployments:
deployments[_picked_model_id] = 1
else:
deployments[_picked_model_id] += 1
await asyncio.sleep(0.000000000005)

print("deployments", deployments)

# assert that len(deployments) >1
assert len(deployments) > 1

0 comments on commit d58dd2c

Please sign in to comment.