Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/tests/pass_through_tests/test_anthropic_passthrough.py
+++ b/Development/litellm/tests/pass_through_tests/test_anthropic_passthrough.py
@@ -0,0 +1,297 @@
+"""
+This test ensures that the proxy can passthrough anthropic requests
+"""
+
+import pytest
+import anthropic
+import aiohttp
+import asyncio
+import json
+
+
+@pytest.mark.asyncio
+@pytest.mark.flaky(retries=3, delay=2)
+async def test_anthropic_basic_completion_with_headers():
+    print("making basic completion request to anthropic passthrough with aiohttp")
+
+    headers = {
+        "Authorization": f"Bearer sk-1234",
+        "Content-Type": "application/json",
+        "Anthropic-Version": "2023-06-01",
+    }
+
+    payload = {
+        "model": "claude-3-5-sonnet-20241022",
+        "max_tokens": 10,
+        "messages": [{"role": "user", "content": "Say 'hello test' and nothing else"}],
+        "litellm_metadata": {
+            "tags": ["test-tag-1", "test-tag-2"],
+        },
+    }
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            "http://0.0.0.0:4000/anthropic/v1/messages", json=payload, headers=headers
+        ) as response:
+            response_text = await response.text()
+            print(f"Response text: {response_text}")
+
+            response_json = await response.json()
+            response_headers = response.headers
+            print(
+                "non-streaming response",
+                json.dumps(response_json, indent=4, default=str),
+            )
+            reported_usage = response_json.get("usage", None)
+            anthropic_api_input_tokens = reported_usage.get("input_tokens", None)
+            anthropic_api_output_tokens = reported_usage.get("output_tokens", None)
+            litellm_call_id = response_headers.get("x-litellm-call-id")
+
+            print(f"LiteLLM Call ID: {litellm_call_id}")
+
+            # Wait for spend to be logged
+            await asyncio.sleep(15)
+
+            # Check spend logs for this specific request with retry logic
+            spend_data = None
+            max_retries = 2
+            for attempt in range(max_retries):
+                print(f"Attempt {attempt + 1}/{max_retries} to check spend logs")
+
+                async with session.get(
+                    f"http://0.0.0.0:4000/spend/logs?request_id={litellm_call_id}",
+                    headers={"Authorization": "Bearer sk-1234"},
+                ) as spend_response:
+                    print("text spend response")
+                    print(f"Spend response: {spend_response}")
+                    spend_data = await spend_response.json()
+                    print(f"Spend data: {spend_data}")
+
+                    # Check if spend data exists and has entries
+                    if spend_data and len(spend_data) > 0:
+                        print("Spend logs found!")
+                        break
+                    else:
+                        print("Spend logs not found yet...")
+                        if (
+                            attempt < max_retries - 1
+                        ):  # Don't wait after the last attempt
+                            print("Waiting 10 seconds before retry...")
+                            await asyncio.sleep(10)
+
+            assert spend_data is not None, "Should have spend data for the request"
+            assert len(spend_data) > 0, "Should have at least one spend log entry"
+
+            log_entry = spend_data[0]  # Get the first (and should be only) log entry
+
+            # Basic existence checks
+            assert spend_data is not None, "Should have spend data for the request"
+            assert isinstance(log_entry, dict), "Log entry should be a dictionary"
+
+            # Request metadata assertions
+            assert log_entry["request_id"] == litellm_call_id, "Request ID should match"
+            assert (
+                log_entry["call_type"] == "pass_through_endpoint"
+            ), "Call type should be pass_through_endpoint"
+            assert (
+                log_entry["api_base"] == "https://api.anthropic.com/v1/messages"
+            ), "API base should be Anthropic's endpoint"
+
+            # Token and spend assertions
+            assert log_entry["spend"] > 0, "Spend value should not be None"
+            assert isinstance(
+                log_entry["spend"], (int, float)
+            ), "Spend should be a number"
+            assert log_entry["total_tokens"] > 0, "Should have some tokens"
+            assert (
+                log_entry["prompt_tokens"] == anthropic_api_input_tokens
+            ), f"Should have prompt tokens matching anthropic api. Expected {anthropic_api_input_tokens} but got {log_entry['prompt_tokens']}"
+            assert (
+                log_entry["completion_tokens"] == anthropic_api_output_tokens
+            ), f"Should have completion tokens matching anthropic api. Expected {anthropic_api_output_tokens} but got {log_entry['completion_tokens']}"
+            assert (
+                log_entry["total_tokens"]
+                == log_entry["prompt_tokens"] + log_entry["completion_tokens"]
+            ), "Total tokens should equal prompt + completion"
+
+            # Time assertions
+            assert all(
+                key in log_entry
+                for key in ["startTime", "endTime", "completionStartTime"]
+            ), "Should have all time fields"
+            assert (
+                log_entry["startTime"] < log_entry["endTime"]
+            ), "Start time should be before end time"
+
+            # Metadata assertions
+            assert str(log_entry["cache_hit"]).lower() != "true", "Cache should be off"
+            assert log_entry["request_tags"] == [
+                "test-tag-1",
+                "test-tag-2",
+            ], "Tags should match input"
+            assert (
+                "user_api_key" in log_entry["metadata"]
+            ), "Should have user API key in metadata"
+
+            assert "claude" in log_entry["model"]
+            assert log_entry["custom_llm_provider"] == "anthropic"
+
+
+@pytest.mark.asyncio
+async def test_anthropic_streaming_with_headers():
+    print("making streaming request to anthropic passthrough with aiohttp")
+
+    headers = {
+        "Authorization": f"Bearer sk-1234",
+        "Content-Type": "application/json",
+        "Anthropic-Version": "2023-06-01",
+    }
+
+    payload = {
+        "model": "claude-3-5-sonnet-20241022",
+        "max_tokens": 10,
+        "messages": [
+            {"role": "user", "content": "Say 'hello stream test' and nothing else"}
+        ],
+        "stream": True,
+        "litellm_metadata": {
+            "tags": ["test-tag-stream-1", "test-tag-stream-2"],
+            "user": "test-user-1",
+        },
+    }
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(
+            "http://0.0.0.0:4000/anthropic/v1/messages", json=payload, headers=headers
+        ) as response:
+            print("response status")
+            print(response.status)
+            assert response.status == 200, "Response should be successful"
+            response_headers = response.headers
+            print(f"Response headers: {response_headers}")
+            litellm_call_id = response_headers.get("x-litellm-call-id")
+            print(f"LiteLLM Call ID: {litellm_call_id}")
+
+            collected_output = []
+            async for line in response.content:
+                if line:
+                    text = line.decode("utf-8").strip()
+                    if text.startswith("data: "):
+                        collected_output.append(text[6:])  # Remove 'data: ' prefix
+
+            print("Collected output:", "".join(collected_output))
+            anthropic_api_usage_chunks = []
+            for chunk in collected_output:
+                chunk_json = json.loads(chunk)
+                if "usage" in chunk_json:
+                    anthropic_api_usage_chunks.append(chunk_json["usage"])
+                elif "message" in chunk_json and "usage" in chunk_json["message"]:
+                    anthropic_api_usage_chunks.append(chunk_json["message"]["usage"])
+
+            print(
+                "anthropic_api_usage_chunks",
+                json.dumps(anthropic_api_usage_chunks, indent=4, default=str),
+            )
+
+            print("anthropic_api_usage_chunks: ", anthropic_api_usage_chunks)
+            # Get the most recent value of input tokens (iterate backwards to find last non-zero value)
+            anthropic_api_input_tokens = 0
+            for usage in reversed(anthropic_api_usage_chunks):
+                if usage.get("input_tokens", 0) > 0:
+                    anthropic_api_input_tokens = usage.get("input_tokens", 0)
+                    break
+            anthropic_api_output_tokens = 0
+            for usage in reversed(anthropic_api_usage_chunks):
+                if usage.get("output_tokens", 0) > 0:
+                    anthropic_api_output_tokens = usage.get("output_tokens", 0)
+                    break
+
+            print("anthropic_api_input_tokens", anthropic_api_input_tokens)
+            print("anthropic_api_output_tokens", anthropic_api_output_tokens)
+
+            # Wait for spend to be logged
+            await asyncio.sleep(20)
+
+            # Check spend logs for this specific request with retry logic
+            spend_data = None
+            max_retries = 2
+            for attempt in range(max_retries):
+                print(f"Attempt {attempt + 1}/{max_retries} to check spend logs")
+
+                async with session.get(
+                    f"http://0.0.0.0:4000/spend/logs?request_id={litellm_call_id}",
+                    headers={"Authorization": "Bearer sk-1234"},
+                ) as spend_response:
+                    spend_data = await spend_response.json()
+                    print(f"Spend data: {spend_data}")
+
+                    # Check if spend data exists and has entries
+                    if spend_data and len(spend_data) > 0:
+                        print("Spend logs found!")
+                        break
+                    else:
+                        print("Spend logs not found yet...")
+                        if (
+                            attempt < max_retries - 1
+                        ):  # Don't wait after the last attempt
+                            print("Waiting 10 seconds before retry...")
+                            await asyncio.sleep(10)
+
+            assert spend_data is not None, "Should have spend data for the request"
+            assert len(spend_data) > 0, "Should have at least one spend log entry"
+
+            log_entry = spend_data[0]  # Get the first (and should be only) log entry
+
+            # Basic existence checks
+            assert spend_data is not None, "Should have spend data for the request"
+            assert isinstance(log_entry, dict), "Log entry should be a dictionary"
+
+            # Request metadata assertions
+            assert log_entry["request_id"] == litellm_call_id, "Request ID should match"
+            assert (
+                log_entry["call_type"] == "pass_through_endpoint"
+            ), "Call type should be pass_through_endpoint"
+            # assert (
+            #     log_entry["api_base"] == "https://api.anthropic.com/v1/messages"
+            # ), "API base should be Anthropic's endpoint"
+
+            # Token and spend assertions
+            assert log_entry["spend"] > 0, "Spend value should not be None"
+            assert isinstance(
+                log_entry["spend"], (int, float)
+            ), "Spend should be a number"
+            assert log_entry["total_tokens"] > 0, "Should have some tokens"
+            assert (
+                log_entry["prompt_tokens"] == anthropic_api_input_tokens
+            ), f"Should have prompt tokens matching anthropic api. Expected {anthropic_api_input_tokens} but got {log_entry['prompt_tokens']}"
+            assert (
+                log_entry["completion_tokens"] == anthropic_api_output_tokens
+            ), f"Should have completion tokens matching anthropic api. Expected {anthropic_api_output_tokens} but got {log_entry['completion_tokens']}"
+            assert (
+                log_entry["total_tokens"]
+                == log_entry["prompt_tokens"] + log_entry["completion_tokens"]
+            ), "Total tokens should equal prompt + completion"
+
+            # Time assertions
+            assert all(
+                key in log_entry
+                for key in ["startTime", "endTime", "completionStartTime"]
+            ), "Should have all time fields"
+            assert (
+                log_entry["startTime"] < log_entry["endTime"]
+            ), "Start time should be before end time"
+
+            # Metadata assertions
+            assert str(log_entry["cache_hit"]).lower() != "true", "Cache should be off"
+            assert log_entry["request_tags"] == [
+                "test-tag-stream-1",
+                "test-tag-stream-2",
+            ], "Tags should match input"
+            assert (
+                "user_api_key" in log_entry["metadata"]
+            ), "Should have user API key in metadata"
+
+            assert "claude" in log_entry["model"]
+
+            assert log_entry["end_user"] == "test-user-1"
+            assert log_entry["custom_llm_provider"] == "anthropic"