Merge pull request BerriAI#3360 from BerriAI/litellm_random_pick_lowe…

…st_latency [Fix] Lowest Latency routing - random pick deployments when all latencies=0
azohra · Apr 29, 2024 · d58dd2c · d58dd2c
2 parents 77f155d + 4cb4a7f
commit d58dd2c
Show file tree

Hide file tree

Showing 2 changed files with 80 additions and 0 deletions.
diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py
@@ -312,6 +312,10 @@ def get_available_deployments(
  except:
  input_tokens = 0
 
+ # randomly sample from all_deployments, incase all deployments have latency=0.0
+ _items = all_deployments.items()
+ all_deployments = random.sample(list(_items), len(_items))
+ all_deployments = dict(all_deployments)
  for item, item_map in all_deployments.items():
  ## get the item from model list
  _deployment = None

diff --git a/litellm/tests/test_lowest_latency_routing.py b/litellm/tests/test_lowest_latency_routing.py
@@ -555,3 +555,79 @@ async def test_lowest_latency_routing_with_timeouts():
 
  # ALL the Requests should have been routed to the fast-endpoint
  assert deployments["fast-endpoint"] == 10
+
+
+@pytest.mark.asyncio
+async def test_lowest_latency_routing_first_pick():
+ """
+ PROD Test:
+ - When all deployments are latency=0, it should randomly pick a deployment
+ - IT SHOULD NEVER PICK THE Very First deployment everytime all deployment latencies are 0
+ - This ensures that after the ttl window resets it randomly picks a deployment
+ """
+ import litellm
+
+ litellm.set_verbose = True
+
+ router = Router(
+ model_list=[
+ {
+ "model_name": "azure-model",
+ "litellm_params": {
+ "model": "openai/fast-endpoint",
+ "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+ "api_key": "fake-key",
+ },
+ "model_info": {"id": "fast-endpoint"},
+ },
+ {
+ "model_name": "azure-model",
+ "litellm_params": {
+ "model": "openai/fast-endpoint-2",
+ "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+ "api_key": "fake-key",
+ },
+ "model_info": {"id": "fast-endpoint-2"},
+ },
+ {
+ "model_name": "azure-model",
+ "litellm_params": {
+ "model": "openai/fast-endpoint-2",
+ "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+ "api_key": "fake-key",
+ },
+ "model_info": {"id": "fast-endpoint-3"},
+ },
+ {
+ "model_name": "azure-model",
+ "litellm_params": {
+ "model": "openai/fast-endpoint-2",
+ "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+ "api_key": "fake-key",
+ },
+ "model_info": {"id": "fast-endpoint-4"},
+ },
+ ],
+ routing_strategy="latency-based-routing",
+ routing_strategy_args={"ttl": 0.0000000001},
+ set_verbose=True,
+ debug_level="DEBUG",
+ ) # type: ignore
+
+ deployments = {}
+ for _ in range(5):
+ response = await router.acompletion(
+ model="azure-model", messages=[{"role": "user", "content": "hello"}]
+ )
+ print(response)
+ _picked_model_id = response._hidden_params["model_id"]
+ if _picked_model_id not in deployments:
+ deployments[_picked_model_id] = 1
+ else:
+ deployments[_picked_model_id] += 1
+ await asyncio.sleep(0.000000000005)
+
+ print("deployments", deployments)
+
+ # assert that len(deployments) >1
+ assert len(deployments) > 1