Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/tests/llm_translation/test_cohere.py
+++ b/Development/litellm/tests/llm_translation/test_cohere.py
@@ -0,0 +1,510 @@
+import os
+import sys
+import traceback
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import io
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import json
+
+import pytest
+
+import litellm
+from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
+from unittest.mock import AsyncMock, patch
+from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+
+litellm.num_retries = 3
+
+
+@pytest.mark.parametrize("stream", [True, False])
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.asyncio
+async def test_chat_completion_cohere_citations(stream):
+    try:
+        litellm.set_verbose = True
+        messages = [
+            {
+                "role": "user",
+                "content": "Which penguins are the tallest?",
+            },
+        ]
+        response = await litellm.acompletion(
+            model="cohere_chat/command-r",
+            messages=messages,
+            documents=[
+                {"title": "Tall penguins", "text": "Emperor penguins are the tallest."},
+                {
+                    "title": "Penguin habitats",
+                    "text": "Emperor penguins only live in Antarctica.",
+                },
+            ],
+            stream=stream,
+        )
+
+        if stream:
+            citations_chunk = False
+            async for chunk in response:
+                print("received chunk", chunk)
+                if "citations" in chunk:
+                    citations_chunk = True
+                    break
+            assert citations_chunk
+        else:
+            assert response.citations is not None
+    except litellm.ServiceUnavailableError:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_completion_cohere_command_r_plus_function_call():
+    litellm.set_verbose = True
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+    messages = [
+        {
+            "role": "user",
+            "content": "What's the weather like in Boston today in Fahrenheit?",
+        }
+    ]
+    try:
+        # test without max tokens
+        response = completion(
+            model="command-r-plus",
+            messages=messages,
+            tools=tools,
+            tool_choice="auto",
+        )
+        # Add any assertions, here to check response args
+        print(response)
+        assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
+        assert isinstance(
+            response.choices[0].message.tool_calls[0].function.arguments, str
+        )
+
+        messages.append(
+            response.choices[0].message.model_dump()
+        )  # Add assistant tool invokes
+        tool_result = (
+            '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
+        )
+        # Add user submitted tool results in the OpenAI format
+        messages.append(
+            {
+                "tool_call_id": response.choices[0].message.tool_calls[0].id,
+                "role": "tool",
+                "name": response.choices[0].message.tool_calls[0].function.name,
+                "content": tool_result,
+            }
+        )
+        # In the second response, Cohere should deduce answer from tool results
+        second_response = completion(
+            model="command-r-plus",
+            messages=messages,
+            tools=tools,
+            tool_choice="auto",
+            force_single_step=True,
+        )
+        print(second_response)
+    except litellm.Timeout:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+# @pytest.mark.skip(reason="flaky test, times out frequently")
+@pytest.mark.flaky(retries=6, delay=1)
+def test_completion_cohere():
+    try:
+        # litellm.set_verbose=True
+        messages = [
+            {"role": "system", "content": "You're a good bot"},
+            {"role": "assistant", "content": [{"text": "2", "type": "text"}]},
+            {"role": "assistant", "content": [{"text": "3", "type": "text"}]},
+            {
+                "role": "user",
+                "content": "Hey",
+            },
+        ]
+        response = completion(
+            model="command-r",
+            messages=messages,
+        )
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+# FYI - cohere_chat looks quite unstable, even when testing locally
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.flaky(retries=3, delay=1)
+async def test_chat_completion_cohere(sync_mode):
+    try:
+        litellm.set_verbose = True
+        messages = [
+            {"role": "system", "content": "You're a good bot"},
+            {
+                "role": "user",
+                "content": "Hey",
+            },
+        ]
+        if sync_mode is False:
+            response = await litellm.acompletion(
+                model="cohere_chat/command-r",
+                messages=messages,
+                max_tokens=10,
+            )
+        else:
+            response = completion(
+                model="cohere_chat/command-r",
+                messages=messages,
+                max_tokens=10,
+            )
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("sync_mode", [False])
+async def test_chat_completion_cohere_stream(sync_mode):
+    try:
+        litellm.set_verbose = True
+        messages = [
+            {"role": "system", "content": "You're a good bot"},
+            {
+                "role": "user",
+                "content": "Hey",
+            },
+        ]
+        if sync_mode is False:
+            response = await litellm.acompletion(
+                model="cohere_chat/command-r",
+                messages=messages,
+                max_tokens=10,
+                stream=True,
+            )
+            print("async cohere stream response", response)
+            async for chunk in response:
+                print(chunk)
+        else:
+            response = completion(
+                model="cohere_chat/command-r",
+                messages=messages,
+                max_tokens=10,
+                stream=True,
+            )
+            print(response)
+            for chunk in response:
+                print(chunk)
+    except litellm.APIConnectionError as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.asyncio
+async def test_cohere_request_body_with_allowed_params():
+    """
+    Test to validate that when allowed_openai_params is provided, the request body contains
+    the correct response_format and reasoning_effort values.
+    """
+    # Define test parameters
+    test_response_format = {"type": "json"}
+    test_reasoning_effort = "low"
+    test_tools = [{
+        "type": "function", 
+        "function": {
+            "name": "get_current_time", 
+            "description": "Get the current time in a given location.", 
+            "parameters": {
+                "type": "object", 
+                "properties": {
+                    "location": {"type": "string", "description": "The city name, e.g. San Francisco"}
+                },
+                "required": ["location"]
+            }
+        }
+    }]
+
+    client = AsyncHTTPHandler()
+
+    # Mock the post method
+    with patch.object(client, "post", new=AsyncMock()) as mock_post:
+        try:
+            await litellm.acompletion(
+                model="cohere/command",
+                messages=[{"content": "what llm are you", "role": "user"}],
+                allowed_openai_params=["tools", "response_format", "reasoning_effort"],
+                response_format=test_response_format,
+                reasoning_effort=test_reasoning_effort,
+                tools=test_tools,
+                client=client
+            )
+        except Exception:
+            pass  # We only care about the request body validation
+
+        # Verify the API call was made
+        mock_post.assert_called_once()
+        
+        # Get and parse the request body
+        request_data = json.loads(mock_post.call_args.kwargs["data"])
+        print(f"request_data: {request_data}")
+        
+        # Validate request contains our specified parameters
+        assert "allowed_openai_params" not in request_data
+        assert request_data["response_format"] == test_response_format
+        assert request_data["reasoning_effort"] == test_reasoning_effort
+
+
+def test_cohere_embedding_outout_dimensions():
+    litellm._turn_on_debug()
+    response = embedding(model="cohere/embed-v4.0", input="Hello, world!", dimensions=512)
+    print(f"response: {response}\n")
+    assert len(response.data[0]["embedding"]) == 512
+
+
+# Comprehensive Cohere Embed v4 tests
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_cohere_embed_v4_basic_text(sync_mode):
+    """Test basic text embedding functionality with Cohere Embed v4."""
+    try:
+        data = {
+            "model": "cohere/embed-v4.0",
+            "input": ["Hello world!", "This is a test sentence."],
+            "input_type": "search_document"
+        }
+        
+        if sync_mode:
+            response = embedding(**data)
+        else:
+            response = await litellm.aembedding(**data)
+        
+        # Validate response structure
+        assert response.model is not None
+        assert len(response.data) == 2
+        assert response.data[0]['object'] == 'embedding'
+        assert len(response.data[0]['embedding']) > 0
+        assert response.usage.prompt_tokens > 0
+        assert isinstance(response.usage, litellm.Usage)
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_cohere_embed_v4_with_dimensions(sync_mode):
+    """Test Cohere Embed v4 with specific dimension parameter."""
+    try:
+        data = {
+            "model": "cohere/embed-v4.0",
+            "input": ["Test with custom dimensions"],
+            "dimensions": 512,
+            "input_type": "search_query"
+        }
+        
+        if sync_mode:
+            response = embedding(**data)
+        else:
+            response = await litellm.aembedding(**data)
+        
+        # Validate dimension
+        assert len(response.data[0]['embedding']) == 512
+        assert isinstance(response.usage, litellm.Usage)
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_cohere_embed_v4_image_embedding(sync_mode):
+    """Test Cohere Embed v4 image embedding functionality (multimodal)."""
+    try:
+        import base64
+        
+        # 1x1 pixel red PNG (base64 encoded)
+        test_image_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\x0cIDATx\x9cc\xf8\x00\x00\x00\x01\x00\x01\x00\x00\x00\x00'
+        test_image_b64 = base64.b64encode(test_image_data).decode('utf-8')
+        
+        data = {
+            "model": "cohere/embed-v4.0",
+            "input": [test_image_b64],
+            "input_type": "image"
+        }
+        
+        if sync_mode:
+            response = embedding(**data)
+        else:
+            response = await litellm.aembedding(**data)
+        
+        # Validate response structure for image embedding
+        assert response.model is not None
+        assert len(response.data) == 1
+        assert response.data[0]['object'] == 'embedding'
+        assert len(response.data[0]['embedding']) > 0
+        assert isinstance(response.usage, litellm.Usage)
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.parametrize("input_type", ["search_document", "search_query", "classification", "clustering"])
+@pytest.mark.asyncio
+async def test_cohere_embed_v4_input_types(input_type):
+    """Test Cohere Embed v4 with different input types."""
+    try:
+        response = await litellm.aembedding(
+            model="cohere/embed-v4.0",
+            input=[f"Test text for {input_type}"],
+            input_type=input_type
+        )
+        
+        assert response.model is not None
+        assert len(response.data) == 1
+        assert response.data[0]['object'] == 'embedding'
+        assert len(response.data[0]['embedding']) > 0
+        assert isinstance(response.usage, litellm.Usage)
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_cohere_embed_v4_encoding_format():
+    """Test Cohere Embed v4 with different encoding formats."""
+    try:
+        response = embedding(
+            model="cohere/embed-v4.0",
+            input=["Test encoding format"],
+            encoding_format="float"
+        )
+        
+        assert response.model is not None
+        assert len(response.data) == 1
+        assert response.data[0]['object'] == 'embedding'
+        assert len(response.data[0]['embedding']) > 0
+        # Validate that embeddings are floats
+        assert all(isinstance(x, float) for x in response.data[0]['embedding'])
+        assert isinstance(response.usage, litellm.Usage)
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_cohere_embed_v4_error_handling():
+    """Test error handling for Cohere Embed v4 with invalid inputs."""
+    try:
+        # Test with empty input - should raise an error
+        try:
+            response = embedding(
+                model="cohere/embed-v4.0",
+                input=[]  # Empty input
+            )
+            pytest.fail("Should have failed with empty input")
+        except Exception:
+            pass  # Expected to fail
+        
+        # Test with None input - should raise an error
+        try:
+            response = embedding(
+                model="cohere/embed-v4.0",
+                input=None
+            )
+            pytest.fail("Should have failed with None input")
+        except Exception:
+            pass  # Expected to fail
+            
+    except Exception as e:
+        pytest.fail(f"Error in error handling test: {e}")
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_cohere_embed_v4_multiple_texts(sync_mode):
+    """Test Cohere Embed v4 with multiple text inputs."""
+    try:
+        texts = [
+            "The quick brown fox jumps over the lazy dog",
+            "Machine learning is transforming the world",
+            "Python is a versatile programming language",
+            "Natural language processing enables human-computer interaction"
+        ]
+        
+        data = {
+            "model": "cohere/embed-v4.0",
+            "input": texts,
+            "input_type": "search_document"
+        }
+        
+        if sync_mode:
+            response = embedding(**data)
+        else:
+            response = await litellm.aembedding(**data)
+        
+        # Validate response structure
+        assert response.model is not None
+        assert len(response.data) == len(texts)
+        
+        for i, data_item in enumerate(response.data):
+            assert data_item['object'] == 'embedding'
+            assert data_item['index'] == i
+            assert len(data_item['embedding']) > 0
+            assert all(isinstance(x, float) for x in data_item['embedding'])
+        
+        assert isinstance(response.usage, litellm.Usage)
+        assert response.usage.prompt_tokens > 0
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_cohere_embed_v4_with_optional_params():
+    """Test Cohere Embed v4 with various optional parameters."""
+    try:
+        response = embedding(
+            model="cohere/embed-v4.0",
+            input=["Test with optional parameters"],
+            input_type="search_query",
+            dimensions=256,
+            encoding_format="float"
+        )
+        
+        # Validate response
+        assert response.model is not None
+        assert len(response.data) == 1
+        assert response.data[0]['object'] == 'embedding'
+        assert len(response.data[0]['embedding']) == 256  # Custom dimensions
+        assert all(isinstance(x, float) for x in response.data[0]['embedding'])
+        assert isinstance(response.usage, litellm.Usage)
+        
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")