Added LiteLLM to the stack
This commit is contained in:
@@ -0,0 +1,536 @@
|
||||
import httpx
|
||||
import json
|
||||
import pytest
|
||||
import sys
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
import os
|
||||
import uuid
|
||||
import time
|
||||
import base64
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
from openai.types.responses.response_create_params import (
|
||||
ResponseInputParam,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
|
||||
def validate_responses_api_response(response, final_chunk: bool = False):
|
||||
"""
|
||||
Validate that a response from litellm.responses() or litellm.aresponses()
|
||||
conforms to the expected ResponsesAPIResponse structure.
|
||||
|
||||
Args:
|
||||
response: The response object to validate
|
||||
|
||||
Raises:
|
||||
AssertionError: If the response doesn't match the expected structure
|
||||
"""
|
||||
# Validate response structure
|
||||
print("response=", json.dumps(response, indent=4, default=str))
|
||||
assert isinstance(
|
||||
response, ResponsesAPIResponse
|
||||
), "Response should be an instance of ResponsesAPIResponse"
|
||||
|
||||
# Required fields
|
||||
assert "id" in response and isinstance(
|
||||
response["id"], str
|
||||
), "Response should have a string 'id' field"
|
||||
assert "created_at" in response and isinstance(
|
||||
response["created_at"], int
|
||||
), "Response should have an integer 'created_at' field"
|
||||
assert "output" in response and isinstance(
|
||||
response["output"], list
|
||||
), "Response should have a list 'output' field"
|
||||
assert "parallel_tool_calls" in response and isinstance(
|
||||
response["parallel_tool_calls"], bool
|
||||
), "Response should have a boolean 'parallel_tool_calls' field"
|
||||
|
||||
# Optional fields with their expected types
|
||||
optional_fields = {
|
||||
"error": (dict, type(None)), # error can be dict or None
|
||||
"incomplete_details": (IncompleteDetails, type(None)),
|
||||
"instructions": (str, type(None)),
|
||||
"metadata": dict,
|
||||
"model": str,
|
||||
"object": str,
|
||||
"temperature": (int, float, type(None)),
|
||||
"tool_choice": (dict, str),
|
||||
"tools": list,
|
||||
"top_p": (int, float, type(None)),
|
||||
"max_output_tokens": (int, type(None)),
|
||||
"previous_response_id": (str, type(None)),
|
||||
"reasoning": dict,
|
||||
"status": str,
|
||||
"text": ResponseTextConfig,
|
||||
"truncation": (str, type(None)),
|
||||
"usage": ResponseAPIUsage,
|
||||
"user": (str, type(None)),
|
||||
"store": (bool, type(None)),
|
||||
}
|
||||
if final_chunk is False:
|
||||
optional_fields["usage"] = type(None)
|
||||
|
||||
for field, expected_type in optional_fields.items():
|
||||
if field in response:
|
||||
assert isinstance(
|
||||
response[field], expected_type
|
||||
), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
|
||||
|
||||
# Check if output has at least one item
|
||||
if final_chunk is True:
|
||||
assert (
|
||||
len(response["output"]) > 0
|
||||
), "Response 'output' field should have at least one item"
|
||||
|
||||
return True # Return True if validation passes
|
||||
|
||||
|
||||
|
||||
class BaseResponsesAPITest(ABC):
|
||||
"""
|
||||
Abstract base test class that enforces a common test across all test classes.
|
||||
"""
|
||||
@abstractmethod
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
"""Must return the base completion call args"""
|
||||
pass
|
||||
|
||||
def get_base_completion_reasoning_call_args(self) -> dict:
|
||||
"""Must return the base completion reasoning call args"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_api(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
try:
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
except litellm.InternalServerError:
|
||||
pytest.skip("Skipping test due to litellm.InternalServerError")
|
||||
print("litellm response=", json.dumps(response, indent=4, default=str))
|
||||
|
||||
# Use the helper function to validate the response
|
||||
validate_responses_api_response(response, final_chunk=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.flaky(retries=3, delay=2)
|
||||
async def test_basic_openai_responses_api_streaming(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
collected_content_string = ""
|
||||
response_completed_event = None
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
input="Basic ping",
|
||||
stream=True,
|
||||
**base_completion_call_args
|
||||
)
|
||||
for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
if event.type == "response.output_text.delta":
|
||||
collected_content_string += event.delta
|
||||
elif event.type == "response.completed":
|
||||
response_completed_event = event
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping",
|
||||
stream=True,
|
||||
**base_completion_call_args
|
||||
)
|
||||
async for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
if event.type == "response.output_text.delta":
|
||||
collected_content_string += event.delta
|
||||
elif event.type == "response.completed":
|
||||
response_completed_event = event
|
||||
|
||||
# assert the delta chunks content had len(collected_content_string) > 0
|
||||
# this content is typically rendered on chat ui's
|
||||
assert len(collected_content_string) > 0
|
||||
|
||||
# assert the response completed event is not None
|
||||
assert response_completed_event is not None
|
||||
|
||||
# assert the response completed event has a response
|
||||
assert response_completed_event.response is not None
|
||||
|
||||
# assert the response completed event includes the usage
|
||||
assert response_completed_event.response.usage is not None
|
||||
|
||||
# basic test assert the usage seems reasonable
|
||||
print("response_completed_event.response.usage=", response_completed_event.response.usage)
|
||||
assert response_completed_event.response.usage.input_tokens > 0 and response_completed_event.response.usage.input_tokens < 100
|
||||
assert response_completed_event.response.usage.output_tokens > 0 and response_completed_event.response.usage.output_tokens < 2000
|
||||
assert response_completed_event.response.usage.total_tokens > 0 and response_completed_event.response.usage.total_tokens < 2000
|
||||
|
||||
# total tokens should be the sum of input and output tokens
|
||||
assert response_completed_event.response.usage.total_tokens == response_completed_event.response.usage.input_tokens + response_completed_event.response.usage.output_tokens
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
# delete the response
|
||||
if isinstance(response, ResponsesAPIResponse):
|
||||
litellm.delete_responses(
|
||||
response_id=response.id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
else:
|
||||
raise ValueError("response is not a ResponsesAPIResponse")
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
# async delete the response
|
||||
if isinstance(response, ResponsesAPIResponse):
|
||||
await litellm.adelete_responses(
|
||||
response_id=response.id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
else:
|
||||
raise ValueError("response is not a ResponsesAPIResponse")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.flaky(retries=3, delay=2)
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
|
||||
#litellm._turn_on_debug()
|
||||
#litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
response_id = None
|
||||
if sync_mode:
|
||||
response_id = None
|
||||
response = litellm.responses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
stream=True,
|
||||
**base_completion_call_args
|
||||
)
|
||||
for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
if "response" in event:
|
||||
response_obj = event.get("response")
|
||||
if response_obj is not None:
|
||||
response_id = response_obj.get("id")
|
||||
print("got response_id=", response_id)
|
||||
|
||||
# delete the response
|
||||
assert response_id is not None
|
||||
litellm.delete_responses(
|
||||
response_id=response_id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
stream=True,
|
||||
**base_completion_call_args
|
||||
)
|
||||
async for event in response:
|
||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||
if "response" in event:
|
||||
response_obj = event.get("response")
|
||||
if response_obj is not None:
|
||||
response_id = response_obj.get("id")
|
||||
print("got response_id=", response_id)
|
||||
|
||||
# delete the response
|
||||
assert response_id is not None
|
||||
await litellm.adelete_responses(
|
||||
response_id=response_id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||
@pytest.mark.flaky(retries=3, delay=2)
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_openai_responses_get_endpoint(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if sync_mode:
|
||||
response = litellm.responses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
# get the response
|
||||
if isinstance(response, ResponsesAPIResponse):
|
||||
result = litellm.get_responses(
|
||||
response_id=response.id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
assert result is not None
|
||||
assert result.id == response.id
|
||||
assert result.output == response.output
|
||||
else:
|
||||
raise ValueError("response is not a ResponsesAPIResponse")
|
||||
else:
|
||||
response = await litellm.aresponses(
|
||||
input="Basic ping", max_output_tokens=20,
|
||||
**base_completion_call_args
|
||||
)
|
||||
# async get the response
|
||||
if isinstance(response, ResponsesAPIResponse):
|
||||
result = await litellm.aget_responses(
|
||||
response_id=response.id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
assert result is not None
|
||||
assert result.id == response.id
|
||||
assert result.output == response.output
|
||||
else:
|
||||
raise ValueError("response is not a ResponsesAPIResponse")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.flaky(retries=3, delay=2)
|
||||
async def test_basic_openai_list_input_items_endpoint(self):
|
||||
"""Test that calls the OpenAI List Input Items endpoint"""
|
||||
litellm._turn_on_debug()
|
||||
|
||||
response = await litellm.aresponses(
|
||||
model="gpt-4o",
|
||||
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||
)
|
||||
print("Initial response=", json.dumps(response, indent=4, default=str))
|
||||
|
||||
response_id = response.get("id")
|
||||
assert response_id is not None, "Response should have an ID"
|
||||
print(f"Got response_id: {response_id}")
|
||||
|
||||
list_items_response = await litellm.alist_input_items(
|
||||
response_id=response_id,
|
||||
limit=20,
|
||||
order="desc",
|
||||
)
|
||||
print(
|
||||
"List items response=",
|
||||
json.dumps(list_items_response, indent=4, default=str),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiturn_responses_api(self):
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
response_1 = await litellm.aresponses(
|
||||
input="Basic ping", max_output_tokens=20, **base_completion_call_args
|
||||
)
|
||||
|
||||
# follow up with a second request
|
||||
response_1_id = response_1.id
|
||||
response_2 = await litellm.aresponses(
|
||||
input="Basic ping",
|
||||
max_output_tokens=20,
|
||||
previous_response_id=response_1_id,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
# assert the response is not None
|
||||
assert response_1 is not None
|
||||
assert response_2 is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_responses_api_with_tool_calls(self):
|
||||
"""Test that calls the Responses API with tool calls including function call and output"""
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
|
||||
# Define the input with message, function call, and function call output
|
||||
input_data: ResponseInputParam = [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": "How is the weather in São Paulo today ?"
|
||||
},
|
||||
{
|
||||
"type": "function_call",
|
||||
"arguments": "{\"location\": \"São Paulo, Brazil\"}",
|
||||
"call_id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
|
||||
"name": "get_weather",
|
||||
"id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
|
||||
"output": "Rainy"
|
||||
}
|
||||
]
|
||||
|
||||
# Define the tools
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get current temperature for a given location.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "City and country e.g. Bogotá, Colombia"
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
"additionalProperties": False
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
try:
|
||||
# Make the responses API call
|
||||
response = await litellm.aresponses(
|
||||
input=input_data,
|
||||
store=False,
|
||||
tools=tools,
|
||||
**base_completion_call_args
|
||||
)
|
||||
except litellm.InternalServerError:
|
||||
pytest.skip("Skipping test due to litellm.InternalServerError")
|
||||
|
||||
print("litellm response=", json.dumps(response, indent=4, default=str))
|
||||
|
||||
# Validate the response structure
|
||||
validate_responses_api_response(response, final_chunk=True)
|
||||
|
||||
# Additional assertions specific to tool calls
|
||||
assert response is not None
|
||||
assert "output" in response
|
||||
assert len(response["output"]) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_responses_api_multi_turn_with_reasoning_and_structured_output(self):
|
||||
"""
|
||||
Test multi-turn conversation with reasoning, structured output, and tool calls.
|
||||
|
||||
This test validates:
|
||||
- First call: Model uses reasoning to process a question and makes a tool call
|
||||
- Tool call handling: Function call output is properly processed
|
||||
- Second call: Model produces structured output incorporating tool results
|
||||
- Structured output: Response conforms to defined Pydantic model schema
|
||||
"""
|
||||
from pydantic import BaseModel
|
||||
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
base_completion_call_args = self.get_base_completion_reasoning_call_args()
|
||||
if base_completion_call_args is None:
|
||||
pytest.skip("Skipping test due to no base completion reasoning call args")
|
||||
|
||||
# Define tools for the conversation
|
||||
tools = [{"type": "function", "name": "get_today"}]
|
||||
|
||||
# Define structured output schema
|
||||
class Output(BaseModel):
|
||||
today: str
|
||||
number_of_r: str
|
||||
|
||||
# Initial conversation input
|
||||
input_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How many r in strrawberrry? While you're thinking, you should call tool get_today. Then you output the today and number of r",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
# First call - should trigger reasoning and tool call
|
||||
response = await litellm.aresponses(
|
||||
input=input_messages,
|
||||
tools=tools,
|
||||
reasoning={"effort": "low", "summary": "detailed"},
|
||||
text_format=Output,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
print("First call output:")
|
||||
print(json.dumps(response.output, indent=4, default=str))
|
||||
|
||||
# Validate first response structure
|
||||
validate_responses_api_response(response, final_chunk=True)
|
||||
assert response.output is not None
|
||||
assert len(response.output) > 0
|
||||
|
||||
# Extend input with first response output
|
||||
input_messages.extend(response.output)
|
||||
|
||||
# Process any tool calls and add function outputs
|
||||
function_outputs = []
|
||||
for item in response.output:
|
||||
if hasattr(item, 'type') and item.type in ["function_call", "custom_tool_call"]:
|
||||
if hasattr(item, 'name') and item.name == "get_today":
|
||||
function_outputs.append({
|
||||
"type": "function_call_output",
|
||||
"call_id": item.call_id,
|
||||
"output": "2025-01-15"
|
||||
})
|
||||
|
||||
# Add function outputs to conversation
|
||||
input_messages.extend(function_outputs)
|
||||
|
||||
print("Second call input:")
|
||||
print(json.dumps(input_messages, indent=4, default=str))
|
||||
|
||||
# Second call - should produce structured output
|
||||
final_response = await litellm.aresponses(
|
||||
input=input_messages,
|
||||
tools=tools,
|
||||
reasoning={"effort": "low", "summary": "detailed"},
|
||||
text_format=Output,
|
||||
**base_completion_call_args
|
||||
)
|
||||
|
||||
print("Second call output:")
|
||||
print(json.dumps(final_response.output, indent=4, default=str))
|
||||
|
||||
# Validate final response structure
|
||||
validate_responses_api_response(final_response, final_chunk=True)
|
||||
assert final_response.output is not None
|
||||
assert len(final_response.output) > 0
|
@@ -0,0 +1,63 @@
|
||||
# conftest.py
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def setup_and_teardown():
|
||||
"""
|
||||
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
|
||||
"""
|
||||
curr_dir = os.getcwd() # Get the current working directory
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the project directory to the system path
|
||||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
|
||||
importlib.reload(litellm)
|
||||
|
||||
try:
|
||||
if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
|
||||
import litellm.proxy.proxy_server
|
||||
|
||||
importlib.reload(litellm.proxy.proxy_server)
|
||||
except Exception as e:
|
||||
print(f"Error reloading litellm.proxy.proxy_server: {e}")
|
||||
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
print(litellm)
|
||||
# from litellm import Router, completion, aembedding, acompletion, embedding
|
||||
yield
|
||||
|
||||
# Teardown code (executes after the yield point)
|
||||
loop.close() # Close the loop created earlier
|
||||
asyncio.set_event_loop(None) # Remove the reference to the loop
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
|
||||
custom_logger_tests = [
|
||||
item for item in items if "custom_logger" in item.parent.name
|
||||
]
|
||||
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
|
||||
|
||||
# Sort tests based on their names
|
||||
custom_logger_tests.sort(key=lambda x: x.name)
|
||||
other_tests.sort(key=lambda x: x.name)
|
||||
|
||||
# Reorder the items list
|
||||
items[:] = custom_logger_tests + other_tests
|
@@ -0,0 +1,139 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from unittest.mock import patch, AsyncMock
|
||||
from litellm.responses.litellm_completion_transformation.handler import LiteLLMCompletionTransformationHandler
|
||||
from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from base_responses_api import BaseResponsesAPITest
|
||||
from openai.types.responses.function_tool import FunctionTool
|
||||
|
||||
|
||||
class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
#litellm._turn_on_debug()
|
||||
return {
|
||||
"model": "anthropic/claude-3-5-sonnet-latest",
|
||||
}
|
||||
|
||||
async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
|
||||
pass
|
||||
|
||||
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
|
||||
pass
|
||||
|
||||
async def test_basic_openai_responses_get_endpoint(self, sync_mode=False):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
def test_multiturn_tool_calls():
|
||||
# Test streaming response with tools for Anthropic
|
||||
litellm._turn_on_debug()
|
||||
shell_tool = dict(FunctionTool(
|
||||
type="function",
|
||||
name="shell",
|
||||
description="Runs a shell command, and returns its output.",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {"type": "array", "items": {"type": "string"}},
|
||||
"workdir": {"type": "string", "description": "The working directory for the command."}
|
||||
},
|
||||
"required": ["command"]
|
||||
},
|
||||
strict=True
|
||||
))
|
||||
|
||||
|
||||
|
||||
# Step 1: Initial request with the tool
|
||||
response = litellm.responses(
|
||||
input=[{
|
||||
'role': 'user',
|
||||
'content': [
|
||||
{'type': 'input_text', 'text': 'make a hello world html file'}
|
||||
],
|
||||
'type': 'message'
|
||||
}],
|
||||
model='anthropic/claude-3-7-sonnet-latest',
|
||||
instructions='You are a helpful coding assistant.',
|
||||
tools=[shell_tool]
|
||||
)
|
||||
|
||||
print("response=", response)
|
||||
|
||||
# Step 2: Send the results of the tool call back to the model
|
||||
# Get the response ID and tool call ID from the response
|
||||
|
||||
response_id = response.id
|
||||
tool_call_id = ""
|
||||
for item in response.output:
|
||||
if 'type' in item and item['type'] == 'function_call':
|
||||
tool_call_id = item['call_id']
|
||||
break
|
||||
|
||||
# Use await with asyncio.run for the async function
|
||||
follow_up_response = litellm.responses(
|
||||
model='anthropic/claude-3-7-sonnet-latest',
|
||||
previous_response_id=response_id,
|
||||
input=[{
|
||||
'type': 'function_call_output',
|
||||
'call_id': tool_call_id,
|
||||
'output': '{"output":"<html>\\n<head>\\n <title>Hello Page</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n <p>Welcome to this simple webpage!</p>\\n</body>\\n</html> > index.html\\n","metadata":{"exit_code":0,"duration_seconds":0}}'
|
||||
}],
|
||||
tools=[shell_tool]
|
||||
)
|
||||
|
||||
print("follow_up_response=", follow_up_response)
|
||||
|
||||
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_response_api_handler_merges_trace_id_without_error():
|
||||
handler = LiteLLMCompletionTransformationHandler()
|
||||
|
||||
async def fake_session_handler(previous_response_id, litellm_completion_request):
|
||||
litellm_completion_request["litellm_trace_id"] = "session-trace"
|
||||
return litellm_completion_request
|
||||
|
||||
with patch.object(
|
||||
LiteLLMCompletionResponsesConfig,
|
||||
"async_responses_api_session_handler",
|
||||
side_effect=fake_session_handler,
|
||||
):
|
||||
with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion:
|
||||
mock_acompletion.return_value = ModelResponse(
|
||||
id="id", created=0, model="test", object="chat.completion", choices=[]
|
||||
)
|
||||
await handler.async_response_api_handler(
|
||||
litellm_completion_request={"model": "test"},
|
||||
request_input="hi",
|
||||
responses_api_request={"previous_response_id": "123"},
|
||||
litellm_trace_id="original-trace",
|
||||
)
|
||||
# ensure acompletion called once with merged trace_id
|
||||
assert mock_acompletion.call_count == 1
|
||||
assert (
|
||||
mock_acompletion.call_args.kwargs["litellm_trace_id"] == "session-trace"
|
||||
)
|
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from unittest.mock import patch, AsyncMock
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import json
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
ResponseAPIUsage,
|
||||
IncompleteDetails,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from base_responses_api import BaseResponsesAPITest
|
||||
|
||||
class TestAzureResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
return {
|
||||
"model": "azure/computer-use-preview",
|
||||
"truncation": "auto",
|
||||
"api_base": os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
|
||||
"api_key": os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_RESPONSES_OPENAI_API_VERSION"),
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_responses_api_preview_api_version():
|
||||
"""
|
||||
Ensure new azure preview api version is working
|
||||
"""
|
||||
litellm._turn_on_debug()
|
||||
response = await litellm.aresponses(
|
||||
model="azure/computer-use-preview",
|
||||
truncation="auto",
|
||||
api_version="preview",
|
||||
api_base=os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
|
||||
api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
|
||||
input="Hello, can you tell me a short joke?",
|
||||
)
|
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Unit tests for BaseResponsesAPIStreamingIterator
|
||||
|
||||
Tests core functionality including:
|
||||
1. Processing chunks and handling ResponseCompletedEvent
|
||||
2. Ensuring _update_responses_api_response_id_with_model_id is called for final chunk
|
||||
3. Verifying ID update is NOT called for non-final chunks (delta events)
|
||||
4. Edge case handling for invalid JSON, empty chunks, and [DONE] markers
|
||||
|
||||
These tests ensure the streaming iterator correctly processes response chunks
|
||||
and applies model ID updates only to completed responses, as required for proper
|
||||
response tracking and logging.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
from litellm.constants import STREAM_SSE_DONE_STRING
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponsesAPIStreamEvents,
|
||||
OutputTextDeltaEvent
|
||||
)
|
||||
|
||||
|
||||
class TestBaseResponsesAPIStreamingIterator:
|
||||
"""Test cases for BaseResponsesAPIStreamingIterator"""
|
||||
|
||||
def test_process_chunk_with_response_completed_event(self):
|
||||
"""
|
||||
Test that _process_chunk correctly processes a ResponseCompletedEvent
|
||||
and calls _update_responses_api_response_id_with_model_id for the final chunk.
|
||||
"""
|
||||
# Mock dependencies
|
||||
mock_response = Mock()
|
||||
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
|
||||
mock_config = Mock(spec=BaseResponsesAPIConfig)
|
||||
|
||||
# Create a mock ResponsesAPIResponse for the completed event
|
||||
mock_responses_api_response = Mock(spec=ResponsesAPIResponse)
|
||||
mock_responses_api_response.id = "original_response_id"
|
||||
|
||||
# Create a mock ResponseCompletedEvent
|
||||
mock_completed_event = Mock(spec=ResponseCompletedEvent)
|
||||
mock_completed_event.type = ResponsesAPIStreamEvents.RESPONSE_COMPLETED
|
||||
mock_completed_event.response = mock_responses_api_response
|
||||
|
||||
# Set up the mock transform method to return our completed event
|
||||
mock_config.transform_streaming_response.return_value = mock_completed_event
|
||||
|
||||
# Mock the _update_responses_api_response_id_with_model_id method
|
||||
updated_response = Mock(spec=ResponsesAPIResponse)
|
||||
updated_response.id = "updated_response_id"
|
||||
|
||||
# Create the iterator instance
|
||||
iterator = BaseResponsesAPIStreamingIterator(
|
||||
response=mock_response,
|
||||
model="gpt-4",
|
||||
responses_api_provider_config=mock_config,
|
||||
logging_obj=mock_logging_obj,
|
||||
litellm_metadata={"model_info": {"id": "model_123"}},
|
||||
custom_llm_provider="openai"
|
||||
)
|
||||
|
||||
# Prepare test chunk data
|
||||
test_chunk_data = {
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "original_response_id",
|
||||
"output": [{"type": "message", "content": [{"text": "Hello World"}]}]
|
||||
}
|
||||
}
|
||||
|
||||
with patch.object(
|
||||
ResponsesAPIRequestUtils,
|
||||
'_update_responses_api_response_id_with_model_id',
|
||||
return_value=updated_response
|
||||
) as mock_update_id:
|
||||
# Process the chunk
|
||||
result = iterator._process_chunk(json.dumps(test_chunk_data))
|
||||
|
||||
# Assertions
|
||||
assert result is not None
|
||||
assert result.type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
|
||||
|
||||
# Verify that _update_responses_api_response_id_with_model_id was called
|
||||
mock_update_id.assert_called_once_with(
|
||||
responses_api_response=mock_responses_api_response,
|
||||
litellm_metadata={"model_info": {"id": "model_123"}},
|
||||
custom_llm_provider="openai"
|
||||
)
|
||||
|
||||
# Verify the completed response was stored
|
||||
assert iterator.completed_response == result
|
||||
|
||||
# Verify the response was updated on the event
|
||||
assert result.response == updated_response
|
||||
|
||||
def test_process_chunk_with_delta_event_no_id_update(self):
|
||||
"""
|
||||
Test that _process_chunk correctly processes a delta event
|
||||
and does NOT call _update_responses_api_response_id_with_model_id.
|
||||
"""
|
||||
# Mock dependencies
|
||||
mock_response = Mock()
|
||||
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
|
||||
mock_config = Mock(spec=BaseResponsesAPIConfig)
|
||||
|
||||
# Create a mock OutputTextDeltaEvent (not a completed event)
|
||||
mock_delta_event = Mock(spec=OutputTextDeltaEvent)
|
||||
mock_delta_event.type = ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
|
||||
mock_delta_event.delta = "Hello"
|
||||
# Delta events don't have a response attribute
|
||||
delattr(mock_delta_event, 'response') if hasattr(mock_delta_event, 'response') else None
|
||||
|
||||
# Set up the mock transform method to return our delta event
|
||||
mock_config.transform_streaming_response.return_value = mock_delta_event
|
||||
|
||||
# Create the iterator instance
|
||||
iterator = BaseResponsesAPIStreamingIterator(
|
||||
response=mock_response,
|
||||
model="gpt-4",
|
||||
responses_api_provider_config=mock_config,
|
||||
logging_obj=mock_logging_obj,
|
||||
litellm_metadata={"model_info": {"id": "model_123"}},
|
||||
custom_llm_provider="openai"
|
||||
)
|
||||
|
||||
# Prepare test chunk data for a delta event
|
||||
test_chunk_data = {
|
||||
"type": "response.output_text.delta",
|
||||
"delta": "Hello",
|
||||
"item_id": "item_123",
|
||||
"output_index": 0,
|
||||
"content_index": 0
|
||||
}
|
||||
|
||||
with patch.object(
|
||||
ResponsesAPIRequestUtils,
|
||||
'_update_responses_api_response_id_with_model_id'
|
||||
) as mock_update_id:
|
||||
# Process the chunk
|
||||
result = iterator._process_chunk(json.dumps(test_chunk_data))
|
||||
|
||||
# Assertions
|
||||
assert result is not None
|
||||
assert result.type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
|
||||
|
||||
# Verify that _update_responses_api_response_id_with_model_id was NOT called
|
||||
mock_update_id.assert_not_called()
|
||||
|
||||
# Verify no completed response was stored (since this is not a completed event)
|
||||
assert iterator.completed_response is None
|
||||
|
||||
def test_process_chunk_handles_invalid_json(self):
|
||||
"""
|
||||
Test that _process_chunk gracefully handles invalid JSON.
|
||||
"""
|
||||
# Mock dependencies
|
||||
mock_response = Mock()
|
||||
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
|
||||
mock_config = Mock(spec=BaseResponsesAPIConfig)
|
||||
|
||||
# Create the iterator instance
|
||||
iterator = BaseResponsesAPIStreamingIterator(
|
||||
response=mock_response,
|
||||
model="gpt-4",
|
||||
responses_api_provider_config=mock_config,
|
||||
logging_obj=mock_logging_obj
|
||||
)
|
||||
|
||||
# Test with invalid JSON
|
||||
result = iterator._process_chunk("invalid json {")
|
||||
|
||||
# Should return None for invalid JSON
|
||||
assert result is None
|
||||
assert iterator.completed_response is None
|
||||
|
||||
def test_process_chunk_handles_done_marker(self):
|
||||
"""
|
||||
Test that _process_chunk correctly handles the [DONE] marker.
|
||||
"""
|
||||
# Mock dependencies
|
||||
mock_response = Mock()
|
||||
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
|
||||
mock_config = Mock(spec=BaseResponsesAPIConfig)
|
||||
|
||||
# Create the iterator instance
|
||||
iterator = BaseResponsesAPIStreamingIterator(
|
||||
response=mock_response,
|
||||
model="gpt-4",
|
||||
responses_api_provider_config=mock_config,
|
||||
logging_obj=mock_logging_obj
|
||||
)
|
||||
|
||||
# Test with [DONE] marker
|
||||
result = iterator._process_chunk(STREAM_SSE_DONE_STRING)
|
||||
|
||||
# Should return None and set finished flag
|
||||
assert result is None
|
||||
assert iterator.finished is True
|
||||
|
||||
def test_process_chunk_handles_empty_chunk(self):
|
||||
"""
|
||||
Test that _process_chunk correctly handles empty or None chunks.
|
||||
"""
|
||||
# Mock dependencies
|
||||
mock_response = Mock()
|
||||
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
|
||||
mock_config = Mock(spec=BaseResponsesAPIConfig)
|
||||
|
||||
# Create the iterator instance
|
||||
iterator = BaseResponsesAPIStreamingIterator(
|
||||
response=mock_response,
|
||||
model="gpt-4",
|
||||
responses_api_provider_config=mock_config,
|
||||
logging_obj=mock_logging_obj
|
||||
)
|
||||
|
||||
# Test with empty chunk
|
||||
result = iterator._process_chunk("")
|
||||
assert result is None
|
||||
|
||||
# Test with None chunk
|
||||
result = iterator._process_chunk(None)
|
||||
assert result is None
|
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
from unittest.mock import patch, AsyncMock
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import litellm
|
||||
import json
|
||||
from base_responses_api import BaseResponsesAPITest
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_google_ai_studio_responses_api_with_tools():
|
||||
litellm._turn_on_debug()
|
||||
litellm.set_verbose = True
|
||||
request_model = "gemini/gemini-2.5-flash"
|
||||
response = await litellm.aresponses(
|
||||
model=request_model,
|
||||
input="what is the latest version of supabase python package and when was it released?",
|
||||
tools=[
|
||||
{
|
||||
"type": "web_search_preview",
|
||||
"search_context_size": "low"
|
||||
}
|
||||
]
|
||||
)
|
||||
print("litellm response=", json.dumps(response, indent=4, default=str))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mock_basic_google_ai_studio_responses_api_with_tools():
|
||||
"""
|
||||
- Ensure that this is the request that litellm.completion gets when we pass web search options
|
||||
|
||||
litellm.acompletion(messages=[{'role': 'user', 'content': 'what is the latest version of supabase python package and when was it released?'}], model='gemini-2.5-flash', tools=[], web_search_options={'search_context_size': 'low', 'user_location': None})
|
||||
"""
|
||||
# Mock the acompletion function
|
||||
litellm._turn_on_debug()
|
||||
mock_response = litellm.ModelResponse(
|
||||
id="test-id",
|
||||
created=1234567890,
|
||||
model="gemini/gemini-2.5-flash",
|
||||
object="chat.completion",
|
||||
choices=[
|
||||
litellm.utils.Choices(
|
||||
index=0,
|
||||
message=litellm.utils.Message(
|
||||
role="assistant",
|
||||
content="Test response"
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_acompletion:
|
||||
mock_acompletion.return_value = mock_response
|
||||
|
||||
request_model = "gemini/gemini-2.5-flash"
|
||||
await litellm.aresponses(
|
||||
model=request_model,
|
||||
input="what is the latest version of supabase python package and when was it released?",
|
||||
tools=[
|
||||
{
|
||||
"type": "web_search_preview",
|
||||
"search_context_size": "low"
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Verify that acompletion was called
|
||||
assert mock_acompletion.called
|
||||
|
||||
# Get the call arguments
|
||||
call_args, call_kwargs = mock_acompletion.call_args
|
||||
|
||||
# Verify the expected parameters were passed
|
||||
print("call kwargs to litellm.completion=", json.dumps(call_kwargs, indent=4, default=str))
|
||||
assert "web_search_options" in call_kwargs
|
||||
assert call_kwargs["web_search_options"] is not None
|
||||
assert call_kwargs["web_search_options"]["search_context_size"] == "low"
|
||||
assert call_kwargs["web_search_options"]["user_location"] is None
|
||||
|
||||
# Verify other expected parameters
|
||||
assert call_kwargs["model"] == "gemini-2.5-flash"
|
||||
assert len(call_kwargs["messages"]) == 1
|
||||
assert call_kwargs["messages"][0]["role"] == "user"
|
||||
assert call_kwargs["messages"][0]["content"] == "what is the latest version of supabase python package and when was it released?"
|
||||
assert call_kwargs["tools"] == [] # web search tools are converted to web_search_options, not kept as tools
|
||||
|
||||
class TestGoogleAIStudioResponsesAPITest(BaseResponsesAPITest):
|
||||
def get_base_completion_call_args(self):
|
||||
#litellm._turn_on_debug()
|
||||
return {
|
||||
"model": "gemini/gemini-2.5-flash-lite"
|
||||
}
|
||||
|
||||
async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
|
||||
pass
|
||||
|
||||
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
|
||||
pass
|
||||
|
||||
async def test_basic_openai_responses_get_endpoint(self, sync_mode=False):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user