Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/tests/llm_responses_api_testing/base_responses_api.py
+++ b/Development/litellm/tests/llm_responses_api_testing/base_responses_api.py
@@ -0,0 +1,536 @@
+import httpx
+import json
+import pytest
+import sys
+from typing import Any, Dict, List
+from unittest.mock import MagicMock, Mock, patch
+import os
+import uuid
+import time
+import base64
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+from abc import ABC, abstractmethod
+
+from litellm.integrations.custom_logger import CustomLogger
+import json
+from litellm.types.utils import StandardLoggingPayload
+from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponseTextConfig,
+    ResponseAPIUsage,
+    IncompleteDetails,
+)
+from openai.types.responses.response_create_params import (
+    ResponseInputParam,
+)
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+
+
+def validate_responses_api_response(response, final_chunk: bool = False):
+    """
+    Validate that a response from litellm.responses() or litellm.aresponses()
+    conforms to the expected ResponsesAPIResponse structure.
+
+    Args:
+        response: The response object to validate
+
+    Raises:
+        AssertionError: If the response doesn't match the expected structure
+    """
+    # Validate response structure
+    print("response=", json.dumps(response, indent=4, default=str))
+    assert isinstance(
+        response, ResponsesAPIResponse
+    ), "Response should be an instance of ResponsesAPIResponse"
+
+    # Required fields
+    assert "id" in response and isinstance(
+        response["id"], str
+    ), "Response should have a string 'id' field"
+    assert "created_at" in response and isinstance(
+        response["created_at"], int
+    ), "Response should have an integer 'created_at' field"
+    assert "output" in response and isinstance(
+        response["output"], list
+    ), "Response should have a list 'output' field"
+    assert "parallel_tool_calls" in response and isinstance(
+        response["parallel_tool_calls"], bool
+    ), "Response should have a boolean 'parallel_tool_calls' field"
+
+    # Optional fields with their expected types
+    optional_fields = {
+        "error": (dict, type(None)),  # error can be dict or None
+        "incomplete_details": (IncompleteDetails, type(None)),
+        "instructions": (str, type(None)),
+        "metadata": dict,
+        "model": str,
+        "object": str,
+        "temperature": (int, float, type(None)),
+        "tool_choice": (dict, str),
+        "tools": list,
+        "top_p": (int, float, type(None)),
+        "max_output_tokens": (int, type(None)),
+        "previous_response_id": (str, type(None)),
+        "reasoning": dict,
+        "status": str,
+        "text": ResponseTextConfig,
+        "truncation": (str, type(None)),
+        "usage": ResponseAPIUsage,
+        "user": (str, type(None)),
+        "store": (bool, type(None)),
+    }
+    if final_chunk is False:
+        optional_fields["usage"] = type(None)
+
+    for field, expected_type in optional_fields.items():
+        if field in response:
+            assert isinstance(
+                response[field], expected_type
+            ), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
+
+    # Check if output has at least one item
+    if final_chunk is True:
+        assert (
+            len(response["output"]) > 0
+        ), "Response 'output' field should have at least one item"
+
+    return True  # Return True if validation passes
+
+
+
+class BaseResponsesAPITest(ABC):
+    """
+    Abstract base test class that enforces a common test across all test classes.
+    """
+    @abstractmethod
+    def get_base_completion_call_args(self) -> dict:
+        """Must return the base completion call args"""
+        pass
+
+    def get_base_completion_reasoning_call_args(self) -> dict:
+        """Must return the base completion reasoning call args"""
+        return None
+
+
+    @pytest.mark.parametrize("sync_mode", [True, False])
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_api(self, sync_mode):
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        try: 
+            if sync_mode:
+                response = litellm.responses(
+                    input="Basic ping", max_output_tokens=20,
+                    **base_completion_call_args
+                )
+            else:
+                response = await litellm.aresponses(
+                    input="Basic ping", max_output_tokens=20,
+                    **base_completion_call_args
+                )
+        except litellm.InternalServerError: 
+            pytest.skip("Skipping test due to litellm.InternalServerError")
+        print("litellm response=", json.dumps(response, indent=4, default=str))
+
+        # Use the helper function to validate the response
+        validate_responses_api_response(response, final_chunk=True)
+
+
+    @pytest.mark.parametrize("sync_mode", [True, False])
+    @pytest.mark.asyncio
+    @pytest.mark.flaky(retries=3, delay=2)
+    async def test_basic_openai_responses_api_streaming(self, sync_mode):
+        litellm._turn_on_debug()
+        base_completion_call_args = self.get_base_completion_call_args()
+        collected_content_string = ""
+        response_completed_event = None
+        if sync_mode:
+            response = litellm.responses(
+                input="Basic ping",
+                stream=True,
+                **base_completion_call_args
+            )
+            for event in response:
+                print("litellm response=", json.dumps(event, indent=4, default=str))
+                if event.type == "response.output_text.delta":
+                    collected_content_string += event.delta
+                elif event.type == "response.completed":
+                    response_completed_event = event
+        else:
+            response = await litellm.aresponses(
+                input="Basic ping",
+                stream=True,
+                **base_completion_call_args
+            )
+            async for event in response:
+                print("litellm response=", json.dumps(event, indent=4, default=str))
+                if event.type == "response.output_text.delta":
+                    collected_content_string += event.delta
+                elif event.type == "response.completed":
+                    response_completed_event = event
+
+        # assert the delta chunks content had len(collected_content_string) > 0
+        # this content is typically rendered on chat ui's
+        assert len(collected_content_string) > 0
+
+        # assert the response completed event is not None
+        assert response_completed_event is not None
+
+        # assert the response completed event has a response
+        assert response_completed_event.response is not None
+
+        # assert the response completed event includes the usage
+        assert response_completed_event.response.usage is not None
+
+        # basic test assert the usage seems reasonable
+        print("response_completed_event.response.usage=", response_completed_event.response.usage)
+        assert response_completed_event.response.usage.input_tokens > 0 and response_completed_event.response.usage.input_tokens < 100
+        assert response_completed_event.response.usage.output_tokens > 0 and response_completed_event.response.usage.output_tokens < 2000
+        assert response_completed_event.response.usage.total_tokens > 0 and response_completed_event.response.usage.total_tokens < 2000
+
+        # total tokens should be the sum of input and output tokens
+        assert response_completed_event.response.usage.total_tokens == response_completed_event.response.usage.input_tokens + response_completed_event.response.usage.output_tokens
+
+
+
+    @pytest.mark.parametrize("sync_mode", [False, True])
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_delete_endpoint(self, sync_mode):
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        if sync_mode:
+            response = litellm.responses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+
+            # delete the response
+            if isinstance(response, ResponsesAPIResponse):
+                litellm.delete_responses(
+                    response_id=response.id,
+                    **base_completion_call_args
+                )
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+        else:
+            response = await litellm.aresponses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+
+            # async delete the response
+            if isinstance(response, ResponsesAPIResponse):
+                await litellm.adelete_responses(
+                    response_id=response.id,
+                    **base_completion_call_args
+                )
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+    
+
+    @pytest.mark.parametrize("sync_mode", [True, False])
+    @pytest.mark.flaky(retries=3, delay=2)
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode):
+        #litellm._turn_on_debug()
+        #litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        response_id = None
+        if sync_mode:
+            response_id = None
+            response = litellm.responses(
+                input="Basic ping", max_output_tokens=20,
+                stream=True,
+                **base_completion_call_args
+            )
+            for event in response:
+                print("litellm response=", json.dumps(event, indent=4, default=str))
+                if "response" in event:
+                    response_obj = event.get("response")
+                    if response_obj is not None:
+                        response_id = response_obj.get("id")
+            print("got response_id=", response_id)
+
+            # delete the response
+            assert response_id is not None
+            litellm.delete_responses(
+                response_id=response_id,
+                **base_completion_call_args
+            )
+        else:
+            response = await litellm.aresponses(
+                input="Basic ping", max_output_tokens=20,
+                stream=True,
+                **base_completion_call_args
+            )
+            async for event in response:
+                print("litellm response=", json.dumps(event, indent=4, default=str))
+                if "response" in event:
+                    response_obj = event.get("response")
+                    if response_obj is not None:
+                        response_id = response_obj.get("id")
+            print("got response_id=", response_id)
+
+            # delete the response
+            assert response_id is not None
+            await litellm.adelete_responses(
+                response_id=response_id,
+                **base_completion_call_args
+            )
+
+    @pytest.mark.parametrize("sync_mode", [False, True])
+    @pytest.mark.flaky(retries=3, delay=2)
+    @pytest.mark.asyncio
+    async def test_basic_openai_responses_get_endpoint(self, sync_mode):
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        if sync_mode:
+            response = litellm.responses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+
+            # get the response
+            if isinstance(response, ResponsesAPIResponse):
+                result = litellm.get_responses(
+                    response_id=response.id,
+                    **base_completion_call_args
+                )
+                assert result is not None
+                assert result.id == response.id
+                assert result.output == response.output
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+        else:
+            response = await litellm.aresponses(
+                input="Basic ping", max_output_tokens=20,
+                **base_completion_call_args
+            )
+            # async get the response
+            if isinstance(response, ResponsesAPIResponse):
+                result = await litellm.aget_responses(
+                    response_id=response.id,
+                    **base_completion_call_args
+                )
+                assert result is not None
+                assert result.id == response.id
+                assert result.output == response.output
+            else:
+                raise ValueError("response is not a ResponsesAPIResponse")
+
+    @pytest.mark.asyncio
+    @pytest.mark.flaky(retries=3, delay=2)
+    async def test_basic_openai_list_input_items_endpoint(self):
+        """Test that calls the OpenAI List Input Items endpoint"""
+        litellm._turn_on_debug()
+
+        response = await litellm.aresponses(
+            model="gpt-4o",
+            input="Tell me a three sentence bedtime story about a unicorn.",
+        )
+        print("Initial response=", json.dumps(response, indent=4, default=str))
+
+        response_id = response.get("id")
+        assert response_id is not None, "Response should have an ID"
+        print(f"Got response_id: {response_id}")
+
+        list_items_response = await litellm.alist_input_items(
+            response_id=response_id,
+            limit=20,
+            order="desc",
+        )
+        print(
+            "List items response=",
+            json.dumps(list_items_response, indent=4, default=str),
+        )
+
+    
+    @pytest.mark.asyncio
+    async def test_multiturn_responses_api(self):
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        response_1 = await litellm.aresponses(
+            input="Basic ping", max_output_tokens=20, **base_completion_call_args
+        )
+
+        # follow up with a second request
+        response_1_id = response_1.id
+        response_2 = await litellm.aresponses(
+            input="Basic ping", 
+            max_output_tokens=20, 
+            previous_response_id=response_1_id,
+            **base_completion_call_args
+        )
+
+        # assert the response is not None
+        assert response_1 is not None
+        assert response_2 is not None
+    
+    @pytest.mark.asyncio
+    async def test_responses_api_with_tool_calls(self):
+        """Test that calls the Responses API with tool calls including function call and output"""
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        
+        # Define the input with message, function call, and function call output
+        input_data: ResponseInputParam = [
+            {
+                "type": "message",
+                "role": "user",
+                "content": "How is the weather in São Paulo today ?"
+            },
+            {
+                "type": "function_call",
+                "arguments": "{\"location\": \"São Paulo, Brazil\"}",
+                "call_id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
+                "name": "get_weather",
+                "id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
+                "status": "completed"
+            },
+            {
+                "type": "function_call_output",
+                "call_id": "fc_1fe70e2a-a596-45ef-b72c-9b8567c460e5",
+                "output": "Rainy"
+            }
+        ]
+        
+        # Define the tools
+        tools = [
+            {
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get current temperature for a given location.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "City and country e.g. Bogotá, Colombia"
+                        }
+                    },
+                    "required": ["location"],
+                    "additionalProperties": False
+                }
+            }
+        ]
+        
+        try:
+            # Make the responses API call
+            response = await litellm.aresponses(
+                input=input_data,
+                store=False,
+                tools=tools,
+                **base_completion_call_args
+            )
+        except litellm.InternalServerError:
+            pytest.skip("Skipping test due to litellm.InternalServerError")
+        
+        print("litellm response=", json.dumps(response, indent=4, default=str))
+        
+        # Validate the response structure
+        validate_responses_api_response(response, final_chunk=True)
+        
+        # Additional assertions specific to tool calls
+        assert response is not None
+        assert "output" in response
+        assert len(response["output"]) > 0
+    
+    @pytest.mark.asyncio
+    async def test_responses_api_multi_turn_with_reasoning_and_structured_output(self):
+        """
+        Test multi-turn conversation with reasoning, structured output, and tool calls.
+        
+        This test validates:
+        - First call: Model uses reasoning to process a question and makes a tool call
+        - Tool call handling: Function call output is properly processed 
+        - Second call: Model produces structured output incorporating tool results
+        - Structured output: Response conforms to defined Pydantic model schema
+        """
+        from pydantic import BaseModel
+        
+        litellm._turn_on_debug()
+        litellm.set_verbose = True
+        base_completion_call_args = self.get_base_completion_reasoning_call_args()
+        if base_completion_call_args is None:
+            pytest.skip("Skipping test due to no base completion reasoning call args")
+        
+        # Define tools for the conversation
+        tools = [{"type": "function", "name": "get_today"}]
+        
+        # Define structured output schema
+        class Output(BaseModel):
+            today: str
+            number_of_r: str
+        
+        # Initial conversation input
+        input_messages = [
+            {
+                "role": "user", 
+                "content": "How many r in strrawberrry? While you're thinking, you should call tool get_today. Then you output the today and number of r",
+            }
+        ]
+        
+
+        # First call - should trigger reasoning and tool call
+        response = await litellm.aresponses(
+            input=input_messages,
+            tools=tools,
+            reasoning={"effort": "low", "summary": "detailed"},
+            text_format=Output,
+            **base_completion_call_args
+        )
+
+        print("First call output:")
+        print(json.dumps(response.output, indent=4, default=str))
+        
+        # Validate first response structure
+        validate_responses_api_response(response, final_chunk=True)
+        assert response.output is not None
+        assert len(response.output) > 0
+        
+        # Extend input with first response output
+        input_messages.extend(response.output)
+        
+        # Process any tool calls and add function outputs
+        function_outputs = []
+        for item in response.output:
+            if hasattr(item, 'type') and item.type in ["function_call", "custom_tool_call"]:
+                if hasattr(item, 'name') and item.name == "get_today":
+                    function_outputs.append({
+                        "type": "function_call_output", 
+                        "call_id": item.call_id, 
+                        "output": "2025-01-15"
+                    })
+        
+        # Add function outputs to conversation
+        input_messages.extend(function_outputs)
+        
+        print("Second call input:")
+        print(json.dumps(input_messages, indent=4, default=str))
+        
+        # Second call - should produce structured output
+        final_response = await litellm.aresponses(
+            input=input_messages,
+            tools=tools,
+            reasoning={"effort": "low", "summary": "detailed"},
+            text_format=Output,
+            **base_completion_call_args
+        )
+        
+        print("Second call output:")
+        print(json.dumps(final_response.output, indent=4, default=str))
+        
+        # Validate final response structure
+        validate_responses_api_response(final_response, final_chunk=True)
+        assert final_response.output is not None
+        assert len(final_response.output) > 0
--- a/Development/litellm/tests/llm_responses_api_testing/conftest.py
+++ b/Development/litellm/tests/llm_responses_api_testing/conftest.py
@@ -0,0 +1,63 @@
+# conftest.py
+
+import importlib
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_and_teardown():
+    """
+    This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
+    """
+    curr_dir = os.getcwd()  # Get the current working directory
+    sys.path.insert(
+        0, os.path.abspath("../..")
+    )  # Adds the project directory to the system path
+
+    import litellm
+    from litellm import Router
+
+    importlib.reload(litellm)
+
+    try:
+        if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
+            import litellm.proxy.proxy_server
+
+            importlib.reload(litellm.proxy.proxy_server)
+    except Exception as e:
+        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+
+    import asyncio
+
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    asyncio.set_event_loop(loop)
+    print(litellm)
+    # from litellm import Router, completion, aembedding, acompletion, embedding
+    yield
+
+    # Teardown code (executes after the yield point)
+    loop.close()  # Close the loop created earlier
+    asyncio.set_event_loop(None)  # Remove the reference to the loop
+
+
+def pytest_collection_modifyitems(config, items):
+    # Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
+    custom_logger_tests = [
+        item for item in items if "custom_logger" in item.parent.name
+    ]
+    other_tests = [item for item in items if "custom_logger" not in item.parent.name]
+
+    # Sort tests based on their names
+    custom_logger_tests.sort(key=lambda x: x.name)
+    other_tests.sort(key=lambda x: x.name)
+
+    # Reorder the items list
+    items[:] = custom_logger_tests + other_tests
--- a/Development/litellm/tests/llm_responses_api_testing/test_anthropic_responses_api.py
+++ b/Development/litellm/tests/llm_responses_api_testing/test_anthropic_responses_api.py
@@ -0,0 +1,139 @@
+import os
+import sys
+import pytest
+import asyncio
+from typing import Optional
+from unittest.mock import patch, AsyncMock
+from litellm.responses.litellm_completion_transformation.handler import LiteLLMCompletionTransformationHandler
+from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig
+from litellm.types.utils import ModelResponse
+
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+import json
+from litellm.types.utils import StandardLoggingPayload
+from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponseTextConfig,
+    ResponseAPIUsage,
+    IncompleteDetails,
+)
+import litellm
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from base_responses_api import BaseResponsesAPITest
+from openai.types.responses.function_tool import FunctionTool
+
+
+class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
+    def get_base_completion_call_args(self):
+        #litellm._turn_on_debug()
+        return {
+            "model": "anthropic/claude-3-5-sonnet-latest",
+        }
+    
+    async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
+        pass
+    
+    async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
+        pass
+
+    async def test_basic_openai_responses_get_endpoint(self, sync_mode=False):
+        pass
+    
+
+
+
+def test_multiturn_tool_calls():
+    # Test streaming response with tools for Anthropic
+    litellm._turn_on_debug()
+    shell_tool = dict(FunctionTool(
+        type="function",
+        name="shell",
+        description="Runs a shell command, and returns its output.",
+        parameters={
+            "type": "object",
+            "properties": {
+                "command": {"type": "array", "items": {"type": "string"}},
+                "workdir": {"type": "string", "description": "The working directory for the command."}
+            },
+            "required": ["command"]
+        },
+        strict=True
+    ))
+    
+
+    
+    # Step 1: Initial request with the tool
+    response = litellm.responses(
+        input=[{
+            'role': 'user', 
+            'content': [
+                {'type': 'input_text', 'text': 'make a hello world html file'}
+            ], 
+            'type': 'message'
+        }],
+        model='anthropic/claude-3-7-sonnet-latest',
+        instructions='You are a helpful coding assistant.',
+        tools=[shell_tool]
+    )
+    
+    print("response=", response)
+    
+    # Step 2: Send the results of the tool call back to the model
+    # Get the response ID and tool call ID from the response
+
+    response_id = response.id
+    tool_call_id = ""
+    for item in response.output:
+        if 'type' in item and item['type'] == 'function_call':
+            tool_call_id = item['call_id']
+            break
+
+    # Use await with asyncio.run for the async function
+    follow_up_response = litellm.responses(
+        model='anthropic/claude-3-7-sonnet-latest',
+        previous_response_id=response_id,
+        input=[{
+            'type': 'function_call_output',
+            'call_id': tool_call_id,
+            'output': '{"output":"<html>\\n<head>\\n  <title>Hello Page</title>\\n</head>\\n<body>\\n  <h1>Hi</h1>\\n  <p>Welcome to this simple webpage!</p>\\n</body>\\n</html> > index.html\\n","metadata":{"exit_code":0,"duration_seconds":0}}'
+        }],
+        tools=[shell_tool]
+    )
+    
+    print("follow_up_response=", follow_up_response)
+        
+
+
+
+@pytest.mark.asyncio
+async def test_async_response_api_handler_merges_trace_id_without_error():
+    handler = LiteLLMCompletionTransformationHandler()
+
+    async def fake_session_handler(previous_response_id, litellm_completion_request):
+        litellm_completion_request["litellm_trace_id"] = "session-trace"
+        return litellm_completion_request
+
+    with patch.object(
+        LiteLLMCompletionResponsesConfig,
+        "async_responses_api_session_handler",
+        side_effect=fake_session_handler,
+    ):
+        with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion:
+            mock_acompletion.return_value = ModelResponse(
+                id="id", created=0, model="test", object="chat.completion", choices=[]
+            )
+            await handler.async_response_api_handler(
+                litellm_completion_request={"model": "test"},
+                request_input="hi",
+                responses_api_request={"previous_response_id": "123"},
+                litellm_trace_id="original-trace",
+            )
+            # ensure acompletion called once with merged trace_id
+            assert mock_acompletion.call_count == 1
+            assert (
+                mock_acompletion.call_args.kwargs["litellm_trace_id"] == "session-trace"
+            )
--- a/Development/litellm/tests/llm_responses_api_testing/test_azure_responses_api.py
+++ b/Development/litellm/tests/llm_responses_api_testing/test_azure_responses_api.py
@@ -0,0 +1,47 @@
+import os
+import sys
+import pytest
+import asyncio
+from typing import Optional
+from unittest.mock import patch, AsyncMock
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+import json
+from litellm.types.utils import StandardLoggingPayload
+from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponseTextConfig,
+    ResponseAPIUsage,
+    IncompleteDetails,
+)
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from base_responses_api import BaseResponsesAPITest
+
+class TestAzureResponsesAPITest(BaseResponsesAPITest):
+    def get_base_completion_call_args(self):
+        return {
+            "model": "azure/computer-use-preview",
+            "truncation": "auto",
+            "api_base": os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
+            "api_key": os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
+            "api_version": os.getenv("AZURE_RESPONSES_OPENAI_API_VERSION"),
+        }
+
+
+@pytest.mark.asyncio
+async def test_azure_responses_api_preview_api_version():
+    """
+    Ensure new azure preview api version is working
+    """
+    litellm._turn_on_debug()
+    response = await litellm.aresponses(
+        model="azure/computer-use-preview",
+        truncation="auto",
+        api_version="preview",
+        api_base=os.getenv("AZURE_RESPONSES_OPENAI_ENDPOINT"),
+        api_key=os.getenv("AZURE_RESPONSES_OPENAI_API_KEY"),
+        input="Hello, can you tell me a short joke?",
+    )
--- a/Development/litellm/tests/llm_responses_api_testing/test_base_responses_api_streaming_iterator.py
+++ b/Development/litellm/tests/llm_responses_api_testing/test_base_responses_api_streaming_iterator.py
@@ -0,0 +1,239 @@
+"""
+Unit tests for BaseResponsesAPIStreamingIterator
+
+Tests core functionality including:
+1. Processing chunks and handling ResponseCompletedEvent 
+2. Ensuring _update_responses_api_response_id_with_model_id is called for final chunk
+3. Verifying ID update is NOT called for non-final chunks (delta events)
+4. Edge case handling for invalid JSON, empty chunks, and [DONE] markers
+
+These tests ensure the streaming iterator correctly processes response chunks 
+and applies model ID updates only to completed responses, as required for proper
+response tracking and logging.
+"""
+
+import json
+import os
+import sys
+from datetime import datetime
+from typing import Any, Dict, Optional
+from unittest.mock import Mock, patch
+
+import pytest
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+from litellm.constants import STREAM_SSE_DONE_STRING
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
+from litellm.responses.utils import ResponsesAPIRequestUtils
+from litellm.types.llms.openai import (
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponsesAPIStreamEvents,
+    OutputTextDeltaEvent
+)
+
+
+class TestBaseResponsesAPIStreamingIterator:
+    """Test cases for BaseResponsesAPIStreamingIterator"""
+
+    def test_process_chunk_with_response_completed_event(self):
+        """
+        Test that _process_chunk correctly processes a ResponseCompletedEvent 
+        and calls _update_responses_api_response_id_with_model_id for the final chunk.
+        """
+        # Mock dependencies
+        mock_response = Mock()
+        mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
+        mock_config = Mock(spec=BaseResponsesAPIConfig)
+        
+        # Create a mock ResponsesAPIResponse for the completed event
+        mock_responses_api_response = Mock(spec=ResponsesAPIResponse)
+        mock_responses_api_response.id = "original_response_id"
+        
+        # Create a mock ResponseCompletedEvent
+        mock_completed_event = Mock(spec=ResponseCompletedEvent)
+        mock_completed_event.type = ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+        mock_completed_event.response = mock_responses_api_response
+        
+        # Set up the mock transform method to return our completed event
+        mock_config.transform_streaming_response.return_value = mock_completed_event
+        
+        # Mock the _update_responses_api_response_id_with_model_id method
+        updated_response = Mock(spec=ResponsesAPIResponse)
+        updated_response.id = "updated_response_id"
+        
+        # Create the iterator instance
+        iterator = BaseResponsesAPIStreamingIterator(
+            response=mock_response,
+            model="gpt-4",
+            responses_api_provider_config=mock_config,
+            logging_obj=mock_logging_obj,
+            litellm_metadata={"model_info": {"id": "model_123"}},
+            custom_llm_provider="openai"
+        )
+        
+        # Prepare test chunk data
+        test_chunk_data = {
+            "type": "response.completed",
+            "response": {
+                "id": "original_response_id",
+                "output": [{"type": "message", "content": [{"text": "Hello World"}]}]
+            }
+        }
+        
+        with patch.object(
+            ResponsesAPIRequestUtils, 
+            '_update_responses_api_response_id_with_model_id',
+            return_value=updated_response
+        ) as mock_update_id:
+            # Process the chunk
+            result = iterator._process_chunk(json.dumps(test_chunk_data))
+            
+            # Assertions
+            assert result is not None
+            assert result.type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+            
+            # Verify that _update_responses_api_response_id_with_model_id was called
+            mock_update_id.assert_called_once_with(
+                responses_api_response=mock_responses_api_response,
+                litellm_metadata={"model_info": {"id": "model_123"}},
+                custom_llm_provider="openai"
+            )
+            
+            # Verify the completed response was stored
+            assert iterator.completed_response == result
+            
+            # Verify the response was updated on the event
+            assert result.response == updated_response
+
+    def test_process_chunk_with_delta_event_no_id_update(self):
+        """
+        Test that _process_chunk correctly processes a delta event
+        and does NOT call _update_responses_api_response_id_with_model_id.
+        """
+        # Mock dependencies
+        mock_response = Mock()
+        mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
+        mock_config = Mock(spec=BaseResponsesAPIConfig)
+        
+        # Create a mock OutputTextDeltaEvent (not a completed event)
+        mock_delta_event = Mock(spec=OutputTextDeltaEvent)
+        mock_delta_event.type = ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
+        mock_delta_event.delta = "Hello"
+        # Delta events don't have a response attribute
+        delattr(mock_delta_event, 'response') if hasattr(mock_delta_event, 'response') else None
+        
+        # Set up the mock transform method to return our delta event
+        mock_config.transform_streaming_response.return_value = mock_delta_event
+        
+        # Create the iterator instance
+        iterator = BaseResponsesAPIStreamingIterator(
+            response=mock_response,
+            model="gpt-4",
+            responses_api_provider_config=mock_config,
+            logging_obj=mock_logging_obj,
+            litellm_metadata={"model_info": {"id": "model_123"}},
+            custom_llm_provider="openai"
+        )
+        
+        # Prepare test chunk data for a delta event
+        test_chunk_data = {
+            "type": "response.output_text.delta",
+            "delta": "Hello",
+            "item_id": "item_123",
+            "output_index": 0,
+            "content_index": 0
+        }
+        
+        with patch.object(
+            ResponsesAPIRequestUtils, 
+            '_update_responses_api_response_id_with_model_id'
+        ) as mock_update_id:
+            # Process the chunk
+            result = iterator._process_chunk(json.dumps(test_chunk_data))
+            
+            # Assertions
+            assert result is not None
+            assert result.type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
+            
+            # Verify that _update_responses_api_response_id_with_model_id was NOT called
+            mock_update_id.assert_not_called()
+            
+            # Verify no completed response was stored (since this is not a completed event)
+            assert iterator.completed_response is None
+
+    def test_process_chunk_handles_invalid_json(self):
+        """
+        Test that _process_chunk gracefully handles invalid JSON.
+        """
+        # Mock dependencies
+        mock_response = Mock()
+        mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
+        mock_config = Mock(spec=BaseResponsesAPIConfig)
+        
+        # Create the iterator instance
+        iterator = BaseResponsesAPIStreamingIterator(
+            response=mock_response,
+            model="gpt-4",
+            responses_api_provider_config=mock_config,
+            logging_obj=mock_logging_obj
+        )
+        
+        # Test with invalid JSON
+        result = iterator._process_chunk("invalid json {")
+        
+        # Should return None for invalid JSON
+        assert result is None
+        assert iterator.completed_response is None
+
+    def test_process_chunk_handles_done_marker(self):
+        """
+        Test that _process_chunk correctly handles the [DONE] marker.
+        """
+        # Mock dependencies
+        mock_response = Mock()
+        mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
+        mock_config = Mock(spec=BaseResponsesAPIConfig)
+        
+        # Create the iterator instance
+        iterator = BaseResponsesAPIStreamingIterator(
+            response=mock_response,
+            model="gpt-4",
+            responses_api_provider_config=mock_config,
+            logging_obj=mock_logging_obj
+        )
+        
+        # Test with [DONE] marker
+        result = iterator._process_chunk(STREAM_SSE_DONE_STRING)
+        
+        # Should return None and set finished flag
+        assert result is None
+        assert iterator.finished is True
+
+    def test_process_chunk_handles_empty_chunk(self):
+        """
+        Test that _process_chunk correctly handles empty or None chunks.
+        """
+        # Mock dependencies
+        mock_response = Mock()
+        mock_logging_obj = Mock(spec=LiteLLMLoggingObj)
+        mock_config = Mock(spec=BaseResponsesAPIConfig)
+        
+        # Create the iterator instance
+        iterator = BaseResponsesAPIStreamingIterator(
+            response=mock_response,
+            model="gpt-4",
+            responses_api_provider_config=mock_config,
+            logging_obj=mock_logging_obj
+        )
+        
+        # Test with empty chunk
+        result = iterator._process_chunk("")
+        assert result is None
+        
+        # Test with None chunk
+        result = iterator._process_chunk(None)
+        assert result is None
--- a/Development/litellm/tests/llm_responses_api_testing/test_google_ai_studio_responses_api.py
+++ b/Development/litellm/tests/llm_responses_api_testing/test_google_ai_studio_responses_api.py
@@ -0,0 +1,106 @@
+import os
+import sys
+import pytest
+from unittest.mock import patch, AsyncMock
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+import json
+from base_responses_api import BaseResponsesAPITest
+@pytest.mark.asyncio
+async def test_basic_google_ai_studio_responses_api_with_tools():
+    litellm._turn_on_debug()
+    litellm.set_verbose = True
+    request_model = "gemini/gemini-2.5-flash"
+    response = await litellm.aresponses(
+        model=request_model,
+        input="what is the latest version of supabase python package and when was it released?",
+        tools=[
+            {
+                "type": "web_search_preview",
+                "search_context_size": "low"
+            }
+        ]
+    )
+    print("litellm response=", json.dumps(response, indent=4, default=str))
+
+
+@pytest.mark.asyncio
+async def test_mock_basic_google_ai_studio_responses_api_with_tools():
+    """
+    - Ensure that this is the request that litellm.completion gets when we pass web search options 
+
+    litellm.acompletion(messages=[{'role': 'user', 'content': 'what is the latest version of supabase python package and when was it released?'}], model='gemini-2.5-flash', tools=[], web_search_options={'search_context_size': 'low', 'user_location': None})
+    """
+    # Mock the acompletion function
+    litellm._turn_on_debug()
+    mock_response = litellm.ModelResponse(
+        id="test-id",
+        created=1234567890,
+        model="gemini/gemini-2.5-flash",
+        object="chat.completion",
+        choices=[
+            litellm.utils.Choices(
+                index=0,
+                message=litellm.utils.Message(
+                    role="assistant",
+                    content="Test response"
+                ),
+                finish_reason="stop"
+            )
+        ]
+    )
+    
+    with patch('litellm.acompletion', new_callable=AsyncMock) as mock_acompletion:
+        mock_acompletion.return_value = mock_response
+        
+        request_model = "gemini/gemini-2.5-flash"
+        await litellm.aresponses(
+            model=request_model,
+            input="what is the latest version of supabase python package and when was it released?",
+            tools=[
+                {
+                    "type": "web_search_preview",
+                    "search_context_size": "low"
+                }
+            ]
+        )
+        
+        # Verify that acompletion was called
+        assert mock_acompletion.called
+        
+        # Get the call arguments
+        call_args, call_kwargs = mock_acompletion.call_args
+        
+        # Verify the expected parameters were passed
+        print("call kwargs to litellm.completion=", json.dumps(call_kwargs, indent=4, default=str))
+        assert "web_search_options" in call_kwargs
+        assert call_kwargs["web_search_options"] is not None
+        assert call_kwargs["web_search_options"]["search_context_size"] == "low"
+        assert call_kwargs["web_search_options"]["user_location"] is None
+        
+        # Verify other expected parameters
+        assert call_kwargs["model"] == "gemini-2.5-flash"
+        assert len(call_kwargs["messages"]) == 1
+        assert call_kwargs["messages"][0]["role"] == "user"
+        assert call_kwargs["messages"][0]["content"] == "what is the latest version of supabase python package and when was it released?"
+        assert call_kwargs["tools"] == []  # web search tools are converted to web_search_options, not kept as tools
+
+class TestGoogleAIStudioResponsesAPITest(BaseResponsesAPITest):
+    def get_base_completion_call_args(self):
+        #litellm._turn_on_debug()
+        return {
+            "model": "gemini/gemini-2.5-flash-lite"
+        }
+    
+    async def test_basic_openai_responses_delete_endpoint(self, sync_mode=False):
+        pass
+    
+    async def test_basic_openai_responses_streaming_delete_endpoint(self, sync_mode=False):
+        pass
+
+    async def test_basic_openai_responses_get_endpoint(self, sync_mode=False):
+        pass
+    
+    
+
+
--- a/Development/litellm/tests/llm_responses_api_testing/test_openai_responses_api.py
+++ b/Development/litellm/tests/llm_responses_api_testing/test_openai_responses_api.py