Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/tests/otel_tests/test_prometheus.py
+++ b/Development/litellm/tests/otel_tests/test_prometheus.py
@@ -0,0 +1,579 @@
+"""
+Unit tests for prometheus metrics
+"""
+
+import pytest
+import aiohttp
+import asyncio
+import uuid
+import os
+import sys
+from openai import AsyncOpenAI
+from typing import Dict, Any
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+END_USER_ID = "my-test-user-34"
+
+
+async def make_bad_chat_completion_request(session, key):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": "fake-azure-endpoint",
+        "messages": [{"role": "user", "content": "Hello"}],
+    }
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+        return status, response_text
+
+
+async def make_good_chat_completion_request(session, key):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+
+    data = {
+        "model": "fake-openai-endpoint",
+        "messages": [{"role": "user", "content": f"Hello {uuid.uuid4()}"}],
+        "tags": ["teamB"],
+        "user": END_USER_ID,  # test if disable end user tracking for prometheus works
+    }
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+        return status, response_text
+
+
+async def make_chat_completion_request_with_fallback(session, key):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": "fake-azure-endpoint",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "fallbacks": ["fake-openai-endpoint"],
+    }
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+    # make a request with a failed fallback
+    data = {
+        "model": "fake-azure-endpoint",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "fallbacks": ["unknown-model"],
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+    return
+
+
+@pytest.mark.asyncio
+async def test_proxy_failure_metrics():
+    """
+    - Make 1 bad chat completion call to "fake-azure-endpoint"
+    - GET /metrics
+    - assert the failure metric for the requested model is incremented by 1
+    - Assert the Exception class and status code are correct
+    """
+    async with aiohttp.ClientSession() as session:
+        # Make a bad chat completion call
+        status, response_text = await make_bad_chat_completion_request(
+            session, "sk-1234"
+        )
+
+        # Check if the request failed as expected
+        assert status == 429, f"Expected status 429, but got {status}"
+
+        # Get metrics
+        async with session.get("http://0.0.0.0:4000/metrics") as response:
+            metrics = await response.text()
+
+        print("/metrics", metrics)
+
+        # Check if the failure metric is present and correct
+        expected_metric = 'litellm_proxy_failed_requests_metric_total{api_key_alias="None",end_user="None",exception_class="Openai.RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",route="/chat/completions",team="None",team_alias="None",user="default_user_id"} 1.0'
+
+        assert (
+            expected_metric in metrics
+        ), "Expected failure metric not found in /metrics."
+        expected_llm_deployment_failure = 'litellm_deployment_failure_responses_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",status_code="429",team="None",team_alias="None",user="default_user_id",user_email="None"} 1.0'
+        assert expected_llm_deployment_failure
+
+        assert (
+            'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",route="/chat/completions",status_code="429",team="None",team_alias="None",user="default_user_id",user_email="None"} 1.0'
+            in metrics
+        )
+
+        assert (
+            'litellm_deployment_failure_responses_total{api_base="https://exampleopenaiendpoint-production.up.railway.app",api_key_alias="None",api_provider="openai",exception_class="Openai.RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="429",model_id="7499d31f98cd518cf54486d5a00deda6894239ce16d13543398dc8abf870b15f",requested_model="fake-azure-endpoint",team="None",team_alias="None"}'
+            in metrics
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.flaky(retries=3, delay=2)
+async def test_proxy_success_metrics():
+    """
+    Make 1 good /chat/completions call to "openai/gpt-3.5-turbo"
+    GET /metrics
+    Assert the success metric is incremented by 1
+    """
+
+    async with aiohttp.ClientSession() as session:
+        # Make a good chat completion call
+        status, response_text = await make_good_chat_completion_request(
+            session, "sk-1234"
+        )
+
+        # Check if the request succeeded as expected
+        assert status == 200, f"Expected status 200, but got {status}"
+
+        # Get metrics
+        async with session.get("http://0.0.0.0:4000/metrics") as response:
+            metrics = await response.text()
+
+        print("/metrics", metrics)
+
+        assert END_USER_ID not in metrics
+
+        # Check if the success metric is present and correct
+        assert (
+            'litellm_request_total_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'
+            in metrics
+        )
+
+        assert (
+            'litellm_llm_api_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'
+            in metrics
+        )
+
+        verify_latency_metrics(metrics)
+
+
+def verify_latency_metrics(metrics: str):
+    """
+    Assert that LATENCY_BUCKETS distribution is used for
+    - litellm_request_total_latency_metric_bucket
+    - litellm_llm_api_latency_metric_bucket
+
+    Very important to verify that the overhead latency metric is present
+    """
+    from litellm.types.integrations.prometheus import LATENCY_BUCKETS
+    import re
+    import time
+
+    time.sleep(2)
+
+    metric_names = [
+        "litellm_request_total_latency_metric_bucket",
+        "litellm_llm_api_latency_metric_bucket",
+        "litellm_overhead_latency_metric_bucket",
+    ]
+
+    for metric_name in metric_names:
+        # Extract all 'le' values for the current metric
+        pattern = rf'{metric_name}{{.*?le="(.*?)".*?}}'
+        le_values = re.findall(pattern, metrics)
+
+        # Convert to set for easier comparison
+        actual_buckets = set(le_values)
+
+        print("actual_buckets", actual_buckets)
+        expected_buckets = []
+        for bucket in LATENCY_BUCKETS:
+            expected_buckets.append(str(bucket))
+
+        # replace inf with +Inf
+        expected_buckets = [
+            bucket.replace("inf", "+Inf") for bucket in expected_buckets
+        ]
+
+        print("expected_buckets", expected_buckets)
+        expected_buckets = set(expected_buckets)
+        # Verify all expected buckets are present
+        assert (
+            actual_buckets == expected_buckets
+        ), f"Mismatch in {metric_name} buckets. Expected: {expected_buckets}, Got: {actual_buckets}"
+
+
+@pytest.mark.asyncio
+async def test_proxy_fallback_metrics():
+    """
+    Make 1 request with a client side fallback - check metrics
+    """
+
+    async with aiohttp.ClientSession() as session:
+        # Make a good chat completion call
+        await make_chat_completion_request_with_fallback(session, "sk-1234")
+
+        # Get metrics
+        async with session.get("http://0.0.0.0:4000/metrics") as response:
+            metrics = await response.text()
+
+        print("/metrics", metrics)
+
+        # Check if successful fallback metric is incremented
+        assert (
+            'litellm_deployment_successful_fallbacks_total{api_key_alias="None",exception_class="Openai.RateLimitError",exception_status="429",fallback_model="fake-openai-endpoint",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
+            in metrics
+        )
+
+        # Check if failed fallback metric is incremented
+        assert (
+            'litellm_deployment_failed_fallbacks_total{api_key_alias="None",exception_class="Openai.RateLimitError",exception_status="429",fallback_model="unknown-model",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
+            in metrics
+        )
+
+
+async def create_test_team(
+    session: aiohttp.ClientSession, team_data: Dict[str, Any]
+) -> str:
+    """Create a new team and return the team_id"""
+    url = "http://0.0.0.0:4000/team/new"
+    headers = {
+        "Authorization": "Bearer sk-1234",
+        "Content-Type": "application/json",
+    }
+
+    async with session.post(url, headers=headers, json=team_data) as response:
+        assert (
+            response.status == 200
+        ), f"Failed to create team. Status: {response.status}"
+        team_info = await response.json()
+        return team_info["team_id"]
+
+
+async def create_test_user(
+    session: aiohttp.ClientSession, user_data: Dict[str, Any]
+) -> str:
+    """Create a new user and return the user_id"""
+    url = "http://0.0.0.0:4000/user/new"
+    headers = {
+        "Authorization": "Bearer sk-1234",
+        "Content-Type": "application/json",
+    }
+
+    async with session.post(url, headers=headers, json=user_data) as response:
+        assert (
+            response.status == 200
+        ), f"Failed to create user. Status: {response.status}"
+        user_info = await response.json()
+        return user_info
+
+
+async def get_prometheus_metrics(session: aiohttp.ClientSession) -> str:
+    """Fetch current prometheus metrics"""
+    async with session.get("http://0.0.0.0:4000/metrics") as response:
+        assert response.status == 200
+        return await response.text()
+
+
+def extract_budget_metrics(metrics_text: str, team_id: str) -> Dict[str, float]:
+    """Extract budget-related metrics for a specific team"""
+    import re
+
+    metrics = {}
+
+    # Get remaining budget
+    remaining_pattern = f'litellm_remaining_team_budget_metric{{team="{team_id}",team_alias="[^"]*"}} ([0-9.]+)'
+    remaining_match = re.search(remaining_pattern, metrics_text)
+    metrics["remaining"] = float(remaining_match.group(1)) if remaining_match else None
+
+    # Get total budget
+    total_pattern = f'litellm_team_max_budget_metric{{team="{team_id}",team_alias="[^"]*"}} ([0-9.]+)'
+    total_match = re.search(total_pattern, metrics_text)
+    metrics["total"] = float(total_match.group(1)) if total_match else None
+
+    # Get remaining hours
+    hours_pattern = f'litellm_team_budget_remaining_hours_metric{{team="{team_id}",team_alias="[^"]*"}} ([0-9.]+)'
+    hours_match = re.search(hours_pattern, metrics_text)
+    metrics["remaining_hours"] = float(hours_match.group(1)) if hours_match else None
+
+    return metrics
+
+
+async def create_test_key(session: aiohttp.ClientSession, team_id: str) -> str:
+    """Generate a new key for the team and return it"""
+    url = "http://0.0.0.0:4000/key/generate"
+    headers = {
+        "Authorization": "Bearer sk-1234",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "team_id": team_id,
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        assert (
+            response.status == 200
+        ), f"Failed to generate key. Status: {response.status}"
+        key_info = await response.json()
+        return key_info["key"]
+
+
+async def get_team_info(session: aiohttp.ClientSession, team_id: str) -> Dict[str, Any]:
+    """Fetch team info and return the response"""
+    url = f"http://0.0.0.0:4000/team/info?team_id={team_id}"
+    headers = {
+        "Authorization": "Bearer sk-1234",
+    }
+
+    async with session.get(url, headers=headers) as response:
+        assert (
+            response.status == 200
+        ), f"Failed to get team info. Status: {response.status}"
+        return await response.json()
+
+
+@pytest.mark.asyncio
+async def test_team_budget_metrics():
+    """
+    Test team budget tracking metrics:
+    1. Create a team with max_budget
+    2. Generate a key for the team
+    3. Make chat completion requests using OpenAI SDK with team's key
+    4. Verify budget decreases over time
+    5. Verify request costs are being tracked correctly
+    6. Verify prometheus metrics match /team/info spend data
+    """
+    async with aiohttp.ClientSession() as session:
+        # Setup test team
+        team_data = {
+            "team_alias": "budget_test_team",
+            "max_budget": 10,
+            "budget_duration": "7d",
+        }
+        team_id = await create_test_team(session, team_data)
+        print("team_id", team_id)
+        # Generate key for the team
+        team_key = await create_test_key(session, team_id)
+
+        # Initialize OpenAI client with team's key
+        client = AsyncOpenAI(base_url="http://0.0.0.0:4000", api_key=team_key)
+
+        # Make initial request and check budget
+        await client.chat.completions.create(
+            model="fake-openai-endpoint",
+            messages=[{"role": "user", "content": f"Hello {uuid.uuid4()}"}],
+        )
+
+        await asyncio.sleep(11)  # Wait for metrics to update
+
+        # Get metrics after request
+        metrics_after_first = await get_prometheus_metrics(session)
+        print("metrics_after_first", metrics_after_first)
+        first_budget = extract_budget_metrics(metrics_after_first, team_id)
+
+        print(f"Budget after 1 request: {first_budget}")
+        assert (
+            first_budget["remaining"] < 10.0
+        ), "remaining budget should be less than 10.0 after first request"
+        assert first_budget["total"] == 10.0, "Total budget metric is incorrect"
+        print("first_budget['remaining_hours']", first_budget["remaining_hours"])
+        # Budget should have positive remaining hours, up to 7 days
+        assert 0 < first_budget["remaining_hours"] <= 168, "Budget should have positive remaining hours, up to 7 days"
+
+        # Get team info and verify spend matches prometheus metrics
+        team_info = await get_team_info(session, team_id)
+        print("team_info", team_info)
+        _team_info_data = team_info["team_info"]
+
+        # Calculate spend from prometheus (total - remaining)
+        team_info_spend = float(_team_info_data["spend"])
+        team_info_max_budget = float(_team_info_data["max_budget"])
+        team_info_remaining_budget = team_info_max_budget - team_info_spend
+        print("\n\n\n###### Final budget metrics ######\n\n\n")
+        print("team_info_remaining_budget", team_info_remaining_budget)
+        print("prometheus_remaining_budget", first_budget["remaining"])
+        print(
+            "diff between team_info_remaining_budget and prometheus_remaining_budget",
+            team_info_remaining_budget - first_budget["remaining"],
+        )
+
+        # Verify spends match within a small delta (floating point comparison)
+        assert (
+            abs(team_info_remaining_budget - first_budget["remaining"]) <= 0.00000
+        ), f"Spend mismatch: Prometheus={team_info_remaining_budget}, Team Info={first_budget['remaining']}"
+
+
+async def create_test_key_with_budget(
+    session: aiohttp.ClientSession, budget_data: Dict[str, Any]
+) -> str:
+    """Generate a new key with budget constraints and return it"""
+    url = "http://0.0.0.0:4000/key/generate"
+    headers = {
+        "Authorization": "Bearer sk-1234",
+        "Content-Type": "application/json",
+    }
+    print("budget_data", budget_data)
+
+    async with session.post(url, headers=headers, json=budget_data) as response:
+        assert (
+            response.status == 200
+        ), f"Failed to generate key. Status: {response.status}"
+        key_info = await response.json()
+        return key_info["key"]
+
+
+async def get_key_info(session: aiohttp.ClientSession, key: str) -> Dict[str, Any]:
+    """Fetch key info and return the response"""
+    url = "http://0.0.0.0:4000/key/info"
+    headers = {
+        "Authorization": f"Bearer {key}",
+    }
+
+    async with session.get(url, headers=headers) as response:
+        assert (
+            response.status == 200
+        ), f"Failed to get key info. Status: {response.status}"
+        return await response.json()
+
+
+def extract_key_budget_metrics(metrics_text: str, key_id: str) -> Dict[str, float]:
+    """Extract budget-related metrics for a specific key"""
+    import re
+
+    metrics = {}
+
+    # Get remaining budget
+    remaining_pattern = f'litellm_remaining_api_key_budget_metric{{api_key_alias="[^"]*",hashed_api_key="{key_id}"}} ([0-9.]+)'
+    remaining_match = re.search(remaining_pattern, metrics_text)
+    metrics["remaining"] = float(remaining_match.group(1)) if remaining_match else None
+
+    # Get total budget
+    total_pattern = f'litellm_api_key_max_budget_metric{{api_key_alias="[^"]*",hashed_api_key="{key_id}"}} ([0-9.]+)'
+    total_match = re.search(total_pattern, metrics_text)
+    metrics["total"] = float(total_match.group(1)) if total_match else None
+
+    # Get remaining hours
+    hours_pattern = f'litellm_api_key_budget_remaining_hours_metric{{api_key_alias="[^"]*",hashed_api_key="{key_id}"}} ([0-9.]+)'
+    hours_match = re.search(hours_pattern, metrics_text)
+    metrics["remaining_hours"] = float(hours_match.group(1)) if hours_match else None
+
+    return metrics
+
+
+@pytest.mark.asyncio
+async def test_key_budget_metrics():
+    """
+    Test key budget tracking metrics:
+    1. Create a key with max_budget
+    2. Make chat completion requests using OpenAI SDK with the key
+    3. Verify budget decreases over time
+    4. Verify request costs are being tracked correctly
+    5. Verify prometheus metrics match /key/info spend data
+    """
+    async with aiohttp.ClientSession() as session:
+        # Setup test key with unique alias
+        unique_alias = f"budget_test_key_{uuid.uuid4()}"
+        key_data = {
+            "key_alias": unique_alias,
+            "max_budget": 10,
+            "budget_duration": "7d",
+        }
+        key = await create_test_key_with_budget(session, key_data)
+
+        # Extract key_id from the key info
+        key_info = await get_key_info(session, key)
+        print("key_info", key_info)
+        key_id = key_info["key"]
+        print("key_id", key_id)
+
+        # Initialize OpenAI client with the key
+        client = AsyncOpenAI(base_url="http://0.0.0.0:4000", api_key=key)
+
+        # Make initial request and check budget
+        await client.chat.completions.create(
+            model="fake-openai-endpoint",
+            messages=[{"role": "user", "content": f"Hello {uuid.uuid4()}"}],
+        )
+
+        await asyncio.sleep(11)  # Wait for metrics to update
+
+        # Get metrics after request
+        metrics_after_first = await get_prometheus_metrics(session)
+        print("metrics_after_first request", metrics_after_first)
+        first_budget = extract_key_budget_metrics(metrics_after_first, key_id)
+
+        print(f"Budget after 1 request: {first_budget}")
+        assert (
+            first_budget["remaining"] < 10.0
+        ), "remaining budget should be less than 10.0 after first request"
+        assert first_budget["total"] == 10.0, "Total budget metric is incorrect"
+        print("first_budget['remaining_hours']", first_budget["remaining_hours"])
+        # The budget reset time is now midnight, not exactly 7 days (168 hours) from creation
+        # So we'll check if it's within a reasonable range (5-7 days)
+        assert 120 <= first_budget["remaining_hours"] <= 168, "Budget remaining hours should be within a reasonable range (5-7 days)"
+
+        # Get key info and verify spend matches prometheus metrics
+        key_info = await get_key_info(session, key)
+        print("key_info", key_info)
+        _key_info_data = key_info["info"]
+
+        # Calculate spend from prometheus (total - remaining)
+        key_info_spend = float(_key_info_data["spend"])
+        key_info_max_budget = float(_key_info_data["max_budget"])
+        key_info_remaining_budget = key_info_max_budget - key_info_spend
+        print("\n\n\n###### Final budget metrics ######\n\n\n")
+        print("key_info_remaining_budget", key_info_remaining_budget)
+        print("prometheus_remaining_budget", first_budget["remaining"])
+        print(
+            "diff between key_info_remaining_budget and prometheus_remaining_budget",
+            key_info_remaining_budget - first_budget["remaining"],
+        )
+
+        # Verify spends match within a small delta (floating point comparison)
+        assert (
+            abs(key_info_remaining_budget - first_budget["remaining"]) <= 0.00000
+        ), f"Spend mismatch: Prometheus={key_info_remaining_budget}, Key Info={first_budget['remaining']}"
+
+
+@pytest.mark.asyncio
+async def test_user_email_metrics():
+    """
+    Test user email tracking metrics:
+    1. Create a user with user_email
+    2. Make chat completion requests using OpenAI SDK with the user's email
+    3. Verify user email is being tracked correctly in `litellm_user_email_metric`
+    """
+    async with aiohttp.ClientSession() as session:
+        # Create a user with user_email
+        user_email = f"test-{uuid.uuid4()}@example.com"
+        user_data = {
+            "user_email": user_email,
+        }
+        user_info = await create_test_user(session, user_data)
+        key = user_info["key"]
+
+        # Initialize OpenAI client with the user's email
+        client = AsyncOpenAI(base_url="http://0.0.0.0:4000", api_key=key)
+
+        # Make initial request and check budget
+        await client.chat.completions.create(
+            model="fake-openai-endpoint",
+            messages=[{"role": "user", "content": f"Hello {uuid.uuid4()}"}],
+        )
+
+        await asyncio.sleep(11)  # Wait for metrics to update
+
+        # Get metrics after request
+        metrics_after_first = await get_prometheus_metrics(session)
+        print("metrics_after_first request", metrics_after_first)
+        assert (
+            user_email in metrics_after_first
+        ), "user_email should be tracked correctly"