Added LiteLLM to the stack

This commit is contained in:
2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions

View File

@@ -0,0 +1,536 @@
import httpx
import json
import pytest
import sys
from typing import Any, Dict, List
from unittest.mock import MagicMock, Mock, patch
import os
import uuid
import time
import base64
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from abc import ABC, abstractmethod
from litellm.integrations.custom_logger import CustomLogger
import json
from litellm.types.utils import StandardLoggingPayload
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponseTextConfig,
ResponseAPIUsage,
IncompleteDetails,
)
from openai.types.responses.response_create_params import (
ResponseInputParam,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
def validate_responses_api_response(response, final_chunk: bool = False):
"""
Validate that a response from litellm.responses() or litellm.aresponses()
conforms to the expected ResponsesAPIResponse structure.
Args:
response: The response object to validate
Raises:
AssertionError: If the response doesn't match the expected structure
"""
# Validate response structure
print("response=", json.dumps(response, indent=4, default=str))
assert isinstance(
response, ResponsesAPIResponse
), "Response should be an instance of ResponsesAPIResponse"
# Required fields
assert "id" in response and isinstance(
response["id"], str
), "Response should have a string 'id' field"
assert "created_at" in response and isinstance(
response["created_at"], int
), "Response should have an integer 'created_at' field"
assert "output" in response and isinstance(
response["output"], list
), "Response should have a list 'output' field"
assert "parallel_tool_calls" in response and isinstance(
response["parallel_tool_calls"], bool
), "Response should have a boolean 'parallel_tool_calls' field"
# Optional fields with their expected types
optional_fields = {
"error": (dict, type(None)), # error can be dict or None
"incomplete_details": (IncompleteDetails, type(None)),
"instructions": (str, type(None)),
"metadata": dict,
"model": str,
"object": str,
"temperature": (int, float, type(None)),
"tool_choice": (dict, str),
"tools": list,
"top_p": (int, float, type(None)),
"max_output_tokens": (int, type(None)),
"previous_response_id": (str, type(None)),
"reasoning": dict,
"status": str,
"text": ResponseTextConfig,
"truncation": (str, type(None)),
"usage": ResponseAPIUsage,
"user": (str, type(None)),
"store": (bool, type(None)),
}
if final_chunk is False:
optional_fields["usage"] = type(None)
for field, expected_type in optional_fields.items():
if field in response:
assert isinstance(
response[field], expected_type
), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
# Check if output has at least one item
if final_chunk is True:
assert (
len(response["output"]) > 0
), "Response 'output' field should have at least one item"
return True # Return True if validation passes
class BaseResponsesAPITest(ABC):
"""
Abstract base test class that enforces a common test across all test classes.
"""
@abstractmethod
def get_base_completion_call_args(self) -> dict:
"""Must return the base completion call args"""
pass
def get_base_completion_reasoning_call_args(self) -> dict:
"""Must return the base completion reasoning call args"""
return None
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_basic_openai_responses_api(self, sync_mode):
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
try:
if sync_mode:
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
else:
response = await litellm.aresponses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
except litellm.InternalServerError:
pytest.skip("Skipping test due to litellm.InternalServerError")
print("litellm response=", json.dumps(response, indent=4, default=str))
# Use the helper function to validate the response
validate_responses_api_response(response, final_chunk=True)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
@pytest.mark.flaky(retries=3, delay=2)
async def test_basic_openai_responses_api_streaming(self, sync_mode):
litellm._turn_on_debug()
base_completion_call_args = self.get_base_completion_call_args()
collected_content_string = ""
response_completed_event = None
if sync_mode:
response = litellm.responses(
input="Basic ping",
stream=True,
**base_completion_call_args
)
for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
if event.type == "response.output_text.delta":
collected_content_string += event.delta
elif event.type == "response.completed":
response_completed_event = event
else:
response = await litellm.aresponses(
input="Basic ping",
stream=True,
**base_completion_call_args
)
async for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
if event.type == "response.output_text.delta":
collected_content_string += event.delta
elif event.type == "response.completed":
response_completed_event = event
# assert the delta chunks content had len(collected_content_string) > 0
# this content is typically rendered on chat ui's
assert len(collected_content_string) > 0
# assert the response completed event is not None
assert response_completed_event is not None
# assert the response completed event has a response
assert response_completed_event.response is not None
# assert the response completed event includes the usage
assert response_completed_event.response.usage is not None
# basic test assert the usage seems reasonable
print("response_completed_event.response.usage=", response_completed_event.response.usage)
assert response_completed_event.response.usage.input_tokens > 0 and response_completed_event.response.usage.input_tokens < 100
assert response_completed_event.response.usage.output_tokens > 0 and response_completed_event.response.usage.output_tokens < 2000
assert response_completed_event.response.usage.total_tokens > 0 and response_completed_event.response.usage.total_tokens < 2000
# total tokens should be the sum of input and output tokens
assert response_completed_event.response.usage.total_tokens == response_completed_event.response.usage.input_tokens + response_completed_event.response.usage.output_tokens
@pytest.mark.parametrize("sync_mode", [False, True])
@pytest.mark.asyncio
async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
if sync_mode:
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# delete the response
if isinstance(response, ResponsesAPIResponse):
litellm.delete_responses(
response_id=response.id,
**base_completion_call_args
)
else:
raise ValueError("response is not a ResponsesAPIResponse")
else:
response = await litellm.aresponses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# async delete the response
if isinstance(response, ResponsesAPIResponse):
await litellm.adelete_responses(
response_id=response.id,
**base_completion_call_args
)
else:
raise ValueError("response is not a ResponsesAPIResponse")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.flaky(retries=3, delay=2)
@pytest.mark.asyncio
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
#litellm._turn_on_debug()
#litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
response_id = None
if sync_mode:
response_id = None
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
stream=True,
**base_completion_call_args
)
for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
if "response" in event:
response_obj = event.get("response")
if response_obj is not None:
response_id = response_obj.get("id")
print("got response_id=", response_id)
# delete the response
assert response_id is not None
litellm.delete_responses(
response_id=response_id,
**base_completion_call_args
)
else:
response = await litellm.aresponses(
input="Basic ping", max_output_tokens=20,
stream=True,
**base_completion_call_args
)
async for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
if "response" in event:
response_obj = event.get("response")
if response_obj is not None:
response_id = response_obj.get("id")
print("got response_id=", response_id)
# delete the response
assert response_id is not None
await litellm.adelete_responses(
response_id=response_id,
**base_completion_call_args
)
@pytest.mark.parametrize("sync_mode", [False, True])
@pytest.mark.flaky(retries=3, delay=2)
@pytest.mark.asyncio
async def test_basic_openai_responses_get_endpoint(self, sync_mode):
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
if sync_mode:
response = litellm.responses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# get the response
if isinstance(response, ResponsesAPIResponse):
result = litellm.get_responses(
response_id=response.id,
**base_completion_call_args
)
assert result is not None
assert result.id == response.id
assert result.output == response.output
else:
raise ValueError("response is not a ResponsesAPIResponse")
else:
response = await litellm.aresponses(
input="Basic ping", max_output_tokens=20,
**base_completion_call_args
)
# async get the response
if isinstance(response, ResponsesAPIResponse):
result = await litellm.aget_responses(
response_id=response.id,
**base_completion_call_args
)
assert result is not None
assert result.id == response.id
assert result.output == response.output
else:
raise ValueError("response is not a ResponsesAPIResponse")
@pytest.mark.asyncio
@pytest.mark.flaky(retries=3, delay=2)
async def test_basic_openai_list_input_items_endpoint(self):
"""Test that calls the OpenAI List Input Items endpoint"""
litellm._turn_on_debug()
response = await litellm.aresponses(
model="gpt-4o",
input="Tell me a three sentence bedtime story about a unicorn.",
)
print("Initial response=", json.dumps(response, indent=4, default=str))
response_id = response.get("id")
assert response_id is not None, "Response should have an ID"
print(f"Got response_id: {response_id}")
list_items_response = await litellm.alist_input_items(
response_id=response_id,
limit=20,
order="desc",
)
print(
"List items response=",
json.dumps(list_items_response, indent=4, default=str),
)
@pytest.mark.asyncio
async def test_multiturn_responses_api(self):
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
response_1 = await litellm.aresponses(
input="Basic ping", max_output_tokens=20, **base_completion_call_args
)
# follow up with a second request
response_1_id = response_1.id
response_2 = await litellm.aresponses(
input="Basic ping",
max_output_tokens=20,
previous_response_id=response_1_id,
**base_completion_call_args
)
# assert the response is not None
assert response_1 is not None
assert response_2 is not None
@pytest.mark.asyncio
async def test_responses_api_with_tool_calls(self):
"""Test that calls the Responses API with tool calls including function call and output"""
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_call_args()
# Define the input with message, function call, and function call output
input_data: ResponseInputParam = [
{
"type": "message",
"role": "user",
"content": "How is the weather in São Paulo today ?"
},
{
"type": "function_call",
"arguments": "{\"location\": \"São Paulo, Brazil\"}",
"call_id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
"name": "get_weather",
"id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
"status": "completed"
},
{
"type": "function_call_output",
"call_id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
"output": "Rainy"
}
]
# Define the tools
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get current temperature for a given location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and country e.g. Bogotá, Colombia"
}
},
"required": ["location"],
"additionalProperties": False
}
}
]
try:
# Make the responses API call
response = await litellm.aresponses(
input=input_data,
store=False,
tools=tools,
**base_completion_call_args
)
except litellm.InternalServerError:
pytest.skip("Skipping test due to litellm.InternalServerError")
print("litellm response=", json.dumps(response, indent=4, default=str))
# Validate the response structure
validate_responses_api_response(response, final_chunk=True)
# Additional assertions specific to tool calls
assert response is not None
assert "output" in response
assert len(response["output"]) > 0
@pytest.mark.asyncio
async def test_responses_api_multi_turn_with_reasoning_and_structured_output(self):
"""
Test multi-turn conversation with reasoning, structured output, and tool calls.
This test validates:
- First call: Model uses reasoning to process a question and makes a tool call
- Tool call handling: Function call output is properly processed
- Second call: Model produces structured output incorporating tool results
- Structured output: Response conforms to defined Pydantic model schema
"""
from pydantic import BaseModel
litellm._turn_on_debug()
litellm.set_verbose = True
base_completion_call_args = self.get_base_completion_reasoning_call_args()
if base_completion_call_args is None:
pytest.skip("Skipping test due to no base completion reasoning call args")
# Define tools for the conversation
tools = [{"type": "function", "name": "get_today"}]
# Define structured output schema
class Output(BaseModel):
today: str
number_of_r: str
# Initial conversation input
input_messages = [
{
"role": "user",
"content": "How many r in strrawberrry? While you're thinking, you should call tool get_today. Then you output the today and number of r",
}
]
# First call - should trigger reasoning and tool call
response = await litellm.aresponses(
input=input_messages,
tools=tools,
reasoning={"effort": "low", "summary": "detailed"},
text_format=Output,
**base_completion_call_args
)
print("First call output:")
print(json.dumps(response.output, indent=4, default=str))
# Validate first response structure
validate_responses_api_response(response, final_chunk=True)
assert response.output is not None
assert len(response.output) > 0
# Extend input with first response output
input_messages.extend(response.output)
# Process any tool calls and add function outputs
function_outputs = []
for item in response.output:
if hasattr(item, 'type') and item.type in ["function_call", "custom_tool_call"]:
if hasattr(item, 'name') and item.name == "get_today":
function_outputs.append({
"type": "function_call_output",
"call_id": item.call_id,
"output": "2025-01-15"
})
# Add function outputs to conversation
input_messages.extend(function_outputs)
print("Second call input:")
print(json.dumps(input_messages, indent=4, default=str))
# Second call - should produce structured output
final_response = await litellm.aresponses(
input=input_messages,
tools=tools,
reasoning={"effort": "low", "summary": "detailed"},
text_format=Output,
**base_completion_call_args
)
print("Second call output:")
print(json.dumps(final_response.output, indent=4, default=str))
# Validate final response structure
validate_responses_api_response(final_response, final_chunk=True)
assert final_response.output is not None
assert len(final_response.output) > 0

View File

@@ -0,0 +1,63 @@
# conftest.py
import importlib
import os
import sys
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
@pytest.fixture(scope="function", autouse=True)
def setup_and_teardown():
"""
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
"""
curr_dir = os.getcwd() # Get the current working directory
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the project directory to the system path
import litellm
from litellm import Router
importlib.reload(litellm)
try:
if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
import litellm.proxy.proxy_server
importlib.reload(litellm.proxy.proxy_server)
except Exception as e:
print(f"Error reloading litellm.proxy.proxy_server: {e}")
import asyncio
loop = asyncio.get_event_loop_policy().new_event_loop()
asyncio.set_event_loop(loop)
print(litellm)
# from litellm import Router, completion, aembedding, acompletion, embedding
yield
# Teardown code (executes after the yield point)
loop.close() # Close the loop created earlier
asyncio.set_event_loop(None) # Remove the reference to the loop
def pytest_collection_modifyitems(config, items):
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
custom_logger_tests = [
item for item in items if "custom_logger" in item.parent.name
]
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
# Sort tests based on their names
custom_logger_tests.sort(key=lambda x: x.name)
other_tests.sort(key=lambda x: x.name)
# Reorder the items list
items[:] = custom_logger_tests + other_tests

View File

@@ -0,0 +1,139 @@
import os
import sys
import pytest
import asyncio
from typing import Optional
from unittest.mock import patch, AsyncMock
from litellm.responses.litellm_completion_transformation.handler import LiteLLMCompletionTransformationHandler
from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig
from litellm.types.utils import ModelResponse
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.integrations.custom_logger import CustomLogger
import json
from litellm.types.utils import StandardLoggingPayload
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponseTextConfig,
ResponseAPIUsage,
IncompleteDetails,
)
import litellm
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from base_responses_api import BaseResponsesAPITest
from openai.types.responses.function_tool import FunctionTool
class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
def get_base_completion_call_args(self):
#litellm._turn_on_debug()
return {
"model": "anthropic/claude-3-5-sonnet-latest",
}
async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
pass
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
pass
async def test_basic_openai_responses_get_endpoint(self, sync_mode=False):
pass
def test_multiturn_tool_calls():
# Test streaming response with tools for Anthropic
litellm._turn_on_debug()
shell_tool = dict(FunctionTool(
type="function",
name="shell",
description="Runs a shell command, and returns its output.",
parameters={
"type": "object",
"properties": {
"command": {"type": "array", "items": {"type": "string"}},
"workdir": {"type": "string", "description": "The working directory for the command."}
},
"required": ["command"]
},
strict=True
))
# Step 1: Initial request with the tool
response = litellm.responses(
input=[{
'role': 'user',
'content': [
{'type': 'input_text', 'text': 'make a hello world html file'}
],
'type': 'message'
}],
model='anthropic/claude-3-7-sonnet-latest',
instructions='You are a helpful coding assistant.',
tools=[shell_tool]
)
print("response=", response)
# Step 2: Send the results of the tool call back to the model
# Get the response ID and tool call ID from the response
response_id = response.id
tool_call_id = ""
for item in response.output:
if 'type' in item and item['type'] == 'function_call':
tool_call_id = item['call_id']
break
# Use await with asyncio.run for the async function
follow_up_response = litellm.responses(
model='anthropic/claude-3-7-sonnet-latest',
previous_response_id=response_id,
input=[{
'type': 'function_call_output',
'call_id': tool_call_id,
'output': '{"output":"<html>\\n<head>\\n <title>Hello Page</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n <p>Welcome to this simple webpage!</p>\\n</body>\\n</html> > index.html\\n","metadata":{"exit_code":0,"duration_seconds":0}}'
}],
tools=[shell_tool]
)
print("follow_up_response=", follow_up_response)
@pytest.mark.asyncio
async def test_async_response_api_handler_merges_trace_id_without_error():
handler = LiteLLMCompletionTransformationHandler()
async def fake_session_handler(previous_response_id, litellm_completion_request):
litellm_completion_request["litellm_trace_id"] = "session-trace"
return litellm_completion_request
with patch.object(
LiteLLMCompletionResponsesConfig,
"async_responses_api_session_handler",
side_effect=fake_session_handler,
):
with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion:
mock_acompletion.return_value = ModelResponse(
id="id", created=0, model="test", object="chat.completion", choices=[]
)
await handler.async_response_api_handler(
litellm_completion_request={"model": "test"},
request_input="hi",
responses_api_request={"previous_response_id": "123"},
litellm_trace_id="original-trace",
)
# ensure acompletion called once with merged trace_id
assert mock_acompletion.call_count == 1
assert (
mock_acompletion.call_args.kwargs["litellm_trace_id"] == "session-trace"
)

View File

@@ -0,0 +1,47 @@
import os
import sys
import pytest
import asyncio
from typing import Optional
from unittest.mock import patch, AsyncMock
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.integrations.custom_logger import CustomLogger
import json
from litellm.types.utils import StandardLoggingPayload
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponseTextConfig,
ResponseAPIUsage,
IncompleteDetails,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from base_responses_api import BaseResponsesAPITest
class TestAzureResponsesAPITest(BaseResponsesAPITest):
def get_base_completion_call_args(self):
return {
"model": "azure/computer-use-preview",
"truncation": "auto",
"api_base": os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
"api_key": os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
"api_version": os.getenv("AZURE_RESPONSES_OPENAI_API_VERSION"),
}
@pytest.mark.asyncio
async def test_azure_responses_api_preview_api_version():
"""
Ensure new azure preview api version is working
"""
litellm._turn_on_debug()
response = await litellm.aresponses(
model="azure/computer-use-preview",
truncation="auto",
api_version="preview",
api_base=os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
input="Hello, can you tell me a short joke?",
)

View File

@@ -0,0 +1,239 @@
"""
Unit tests for BaseResponsesAPIStreamingIterator
Tests core functionality including:
1. Processing chunks and handling ResponseCompletedEvent
2. Ensuring _update_responses_api_response_id_with_model_id is called for final chunk
3. Verifying ID update is NOT called for non-final chunks (delta events)
4. Edge case handling for invalid JSON, empty chunks, and [DONE] markers
These tests ensure the streaming iterator correctly processes response chunks
and applies model ID updates only to completed responses, as required for proper
response tracking and logging.
"""
import json
import os
import sys
from datetime import datetime
from typing import Any, Dict, Optional
from unittest.mock import Mock, patch
import pytest
sys.path.insert(0, os.path.abspath("../.."))
from litellm.constants import STREAM_SSE_DONE_STRING
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponsesAPIStreamEvents,
OutputTextDeltaEvent
)
class TestBaseResponsesAPIStreamingIterator:
"""Test cases for BaseResponsesAPIStreamingIterator"""
def test_process_chunk_with_response_completed_event(self):
"""
Test that _process_chunk correctly processes a ResponseCompletedEvent
and calls _update_responses_api_response_id_with_model_id for the final chunk.
"""
# Mock dependencies
mock_response = Mock()
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
mock_config = Mock(spec=BaseResponsesAPIConfig)
# Create a mock ResponsesAPIResponse for the completed event
mock_responses_api_response = Mock(spec=ResponsesAPIResponse)
mock_responses_api_response.id = "original_response_id"
# Create a mock ResponseCompletedEvent
mock_completed_event = Mock(spec=ResponseCompletedEvent)
mock_completed_event.type = ResponsesAPIStreamEvents.RESPONSE_COMPLETED
mock_completed_event.response = mock_responses_api_response
# Set up the mock transform method to return our completed event
mock_config.transform_streaming_response.return_value = mock_completed_event
# Mock the _update_responses_api_response_id_with_model_id method
updated_response = Mock(spec=ResponsesAPIResponse)
updated_response.id = "updated_response_id"
# Create the iterator instance
iterator = BaseResponsesAPIStreamingIterator(
response=mock_response,
model="gpt-4",
responses_api_provider_config=mock_config,
logging_obj=mock_logging_obj,
litellm_metadata={"model_info": {"id": "model_123"}},
custom_llm_provider="openai"
)
# Prepare test chunk data
test_chunk_data = {
"type": "response.completed",
"response": {
"id": "original_response_id",
"output": [{"type": "message", "content": [{"text": "Hello World"}]}]
}
}
with patch.object(
ResponsesAPIRequestUtils,
'_update_responses_api_response_id_with_model_id',
return_value=updated_response
) as mock_update_id:
# Process the chunk
result = iterator._process_chunk(json.dumps(test_chunk_data))
# Assertions
assert result is not None
assert result.type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
# Verify that _update_responses_api_response_id_with_model_id was called
mock_update_id.assert_called_once_with(
responses_api_response=mock_responses_api_response,
litellm_metadata={"model_info": {"id": "model_123"}},
custom_llm_provider="openai"
)
# Verify the completed response was stored
assert iterator.completed_response == result
# Verify the response was updated on the event
assert result.response == updated_response
def test_process_chunk_with_delta_event_no_id_update(self):
"""
Test that _process_chunk correctly processes a delta event
and does NOT call _update_responses_api_response_id_with_model_id.
"""
# Mock dependencies
mock_response = Mock()
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
mock_config = Mock(spec=BaseResponsesAPIConfig)
# Create a mock OutputTextDeltaEvent (not a completed event)
mock_delta_event = Mock(spec=OutputTextDeltaEvent)
mock_delta_event.type = ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
mock_delta_event.delta = "Hello"
# Delta events don't have a response attribute
delattr(mock_delta_event, 'response') if hasattr(mock_delta_event, 'response') else None
# Set up the mock transform method to return our delta event
mock_config.transform_streaming_response.return_value = mock_delta_event
# Create the iterator instance
iterator = BaseResponsesAPIStreamingIterator(
response=mock_response,
model="gpt-4",
responses_api_provider_config=mock_config,
logging_obj=mock_logging_obj,
litellm_metadata={"model_info": {"id": "model_123"}},
custom_llm_provider="openai"
)
# Prepare test chunk data for a delta event
test_chunk_data = {
"type": "response.output_text.delta",
"delta": "Hello",
"item_id": "item_123",
"output_index": 0,
"content_index": 0
}
with patch.object(
ResponsesAPIRequestUtils,
'_update_responses_api_response_id_with_model_id'
) as mock_update_id:
# Process the chunk
result = iterator._process_chunk(json.dumps(test_chunk_data))
# Assertions
assert result is not None
assert result.type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
# Verify that _update_responses_api_response_id_with_model_id was NOT called
mock_update_id.assert_not_called()
# Verify no completed response was stored (since this is not a completed event)
assert iterator.completed_response is None
def test_process_chunk_handles_invalid_json(self):
"""
Test that _process_chunk gracefully handles invalid JSON.
"""
# Mock dependencies
mock_response = Mock()
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
mock_config = Mock(spec=BaseResponsesAPIConfig)
# Create the iterator instance
iterator = BaseResponsesAPIStreamingIterator(
response=mock_response,
model="gpt-4",
responses_api_provider_config=mock_config,
logging_obj=mock_logging_obj
)
# Test with invalid JSON
result = iterator._process_chunk("invalid json {")
# Should return None for invalid JSON
assert result is None
assert iterator.completed_response is None
def test_process_chunk_handles_done_marker(self):
"""
Test that _process_chunk correctly handles the [DONE] marker.
"""
# Mock dependencies
mock_response = Mock()
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
mock_config = Mock(spec=BaseResponsesAPIConfig)
# Create the iterator instance
iterator = BaseResponsesAPIStreamingIterator(
response=mock_response,
model="gpt-4",
responses_api_provider_config=mock_config,
logging_obj=mock_logging_obj
)
# Test with [DONE] marker
result = iterator._process_chunk(STREAM_SSE_DONE_STRING)
# Should return None and set finished flag
assert result is None
assert iterator.finished is True
def test_process_chunk_handles_empty_chunk(self):
"""
Test that _process_chunk correctly handles empty or None chunks.
"""
# Mock dependencies
mock_response = Mock()
mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
mock_config = Mock(spec=BaseResponsesAPIConfig)
# Create the iterator instance
iterator = BaseResponsesAPIStreamingIterator(
response=mock_response,
model="gpt-4",
responses_api_provider_config=mock_config,
logging_obj=mock_logging_obj
)
# Test with empty chunk
result = iterator._process_chunk("")
assert result is None
# Test with None chunk
result = iterator._process_chunk(None)
assert result is None

View File

@@ -0,0 +1,106 @@
import os
import sys
import pytest
from unittest.mock import patch, AsyncMock
sys.path.insert(0, os.path.abspath("../.."))
import litellm
import json
from base_responses_api import BaseResponsesAPITest
@pytest.mark.asyncio
async def test_basic_google_ai_studio_responses_api_with_tools():
litellm._turn_on_debug()
litellm.set_verbose = True
request_model = "gemini/gemini-2.5-flash"
response = await litellm.aresponses(
model=request_model,
input="what is the latest version of supabase python package and when was it released?",
tools=[
{
"type": "web_search_preview",
"search_context_size": "low"
}
]
)
print("litellm response=", json.dumps(response, indent=4, default=str))
@pytest.mark.asyncio
async def test_mock_basic_google_ai_studio_responses_api_with_tools():
"""
- Ensure that this is the request that litellm.completion gets when we pass web search options
litellm.acompletion(messages=[{'role': 'user', 'content': 'what is the latest version of supabase python package and when was it released?'}], model='gemini-2.5-flash', tools=[], web_search_options={'search_context_size': 'low', 'user_location': None})
"""
# Mock the acompletion function
litellm._turn_on_debug()
mock_response = litellm.ModelResponse(
id="test-id",
created=1234567890,
model="gemini/gemini-2.5-flash",
object="chat.completion",
choices=[
litellm.utils.Choices(
index=0,
message=litellm.utils.Message(
role="assistant",
content="Test response"
),
finish_reason="stop"
)
]
)
with patch('litellm.acompletion', new_callable=AsyncMock) as mock_acompletion:
mock_acompletion.return_value = mock_response
request_model = "gemini/gemini-2.5-flash"
await litellm.aresponses(
model=request_model,
input="what is the latest version of supabase python package and when was it released?",
tools=[
{
"type": "web_search_preview",
"search_context_size": "low"
}
]
)
# Verify that acompletion was called
assert mock_acompletion.called
# Get the call arguments
call_args, call_kwargs = mock_acompletion.call_args
# Verify the expected parameters were passed
print("call kwargs to litellm.completion=", json.dumps(call_kwargs, indent=4, default=str))
assert "web_search_options" in call_kwargs
assert call_kwargs["web_search_options"] is not None
assert call_kwargs["web_search_options"]["search_context_size"] == "low"
assert call_kwargs["web_search_options"]["user_location"] is None
# Verify other expected parameters
assert call_kwargs["model"] == "gemini-2.5-flash"
assert len(call_kwargs["messages"]) == 1
assert call_kwargs["messages"][0]["role"] == "user"
assert call_kwargs["messages"][0]["content"] == "what is the latest version of supabase python package and when was it released?"
assert call_kwargs["tools"] == [] # web search tools are converted to web_search_options, not kept as tools
class TestGoogleAIStudioResponsesAPITest(BaseResponsesAPITest):
def get_base_completion_call_args(self):
#litellm._turn_on_debug()
return {
"model": "gemini/gemini-2.5-flash-lite"
}
async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
pass
async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
pass
async def test_basic_openai_responses_get_endpoint(self, sync_mode=False):
pass

File diff suppressed because it is too large Load Diff