Added LiteLLM to the stack
This commit is contained in:
510
Development/litellm/tests/llm_translation/test_cohere.py
Normal file
510
Development/litellm/tests/llm_translation/test_cohere.py
Normal file
@@ -0,0 +1,510 @@
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import io
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
litellm.num_retries = 3
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stream", [True, False])
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_completion_cohere_citations(stream):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which penguins are the tallest?",
|
||||
},
|
||||
]
|
||||
response = await litellm.acompletion(
|
||||
model="cohere_chat/command-r",
|
||||
messages=messages,
|
||||
documents=[
|
||||
{"title": "Tall penguins", "text": "Emperor penguins are the tallest."},
|
||||
{
|
||||
"title": "Penguin habitats",
|
||||
"text": "Emperor penguins only live in Antarctica.",
|
||||
},
|
||||
],
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
if stream:
|
||||
citations_chunk = False
|
||||
async for chunk in response:
|
||||
print("received chunk", chunk)
|
||||
if "citations" in chunk:
|
||||
citations_chunk = True
|
||||
break
|
||||
assert citations_chunk
|
||||
else:
|
||||
assert response.citations is not None
|
||||
except litellm.ServiceUnavailableError:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_cohere_command_r_plus_function_call():
|
||||
litellm.set_verbose = True
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||
}
|
||||
]
|
||||
try:
|
||||
# test without max tokens
|
||||
response = completion(
|
||||
model="command-r-plus",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
)
|
||||
# Add any assertions, here to check response args
|
||||
print(response)
|
||||
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
|
||||
assert isinstance(
|
||||
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||
)
|
||||
|
||||
messages.append(
|
||||
response.choices[0].message.model_dump()
|
||||
) # Add assistant tool invokes
|
||||
tool_result = (
|
||||
'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
|
||||
)
|
||||
# Add user submitted tool results in the OpenAI format
|
||||
messages.append(
|
||||
{
|
||||
"tool_call_id": response.choices[0].message.tool_calls[0].id,
|
||||
"role": "tool",
|
||||
"name": response.choices[0].message.tool_calls[0].function.name,
|
||||
"content": tool_result,
|
||||
}
|
||||
)
|
||||
# In the second response, Cohere should deduce answer from tool results
|
||||
second_response = completion(
|
||||
model="command-r-plus",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto",
|
||||
force_single_step=True,
|
||||
)
|
||||
print(second_response)
|
||||
except litellm.Timeout:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# @pytest.mark.skip(reason="flaky test, times out frequently")
|
||||
@pytest.mark.flaky(retries=6, delay=1)
|
||||
def test_completion_cohere():
|
||||
try:
|
||||
# litellm.set_verbose=True
|
||||
messages = [
|
||||
{"role": "system", "content": "You're a good bot"},
|
||||
{"role": "assistant", "content": [{"text": "2", "type": "text"}]},
|
||||
{"role": "assistant", "content": [{"text": "3", "type": "text"}]},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
},
|
||||
]
|
||||
response = completion(
|
||||
model="command-r",
|
||||
messages=messages,
|
||||
)
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
# FYI - cohere_chat looks quite unstable, even when testing locally
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
async def test_chat_completion_cohere(sync_mode):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{"role": "system", "content": "You're a good bot"},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
},
|
||||
]
|
||||
if sync_mode is False:
|
||||
response = await litellm.acompletion(
|
||||
model="cohere_chat/command-r",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
)
|
||||
else:
|
||||
response = completion(
|
||||
model="cohere_chat/command-r",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
)
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("sync_mode", [False])
|
||||
async def test_chat_completion_cohere_stream(sync_mode):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{"role": "system", "content": "You're a good bot"},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
},
|
||||
]
|
||||
if sync_mode is False:
|
||||
response = await litellm.acompletion(
|
||||
model="cohere_chat/command-r",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
stream=True,
|
||||
)
|
||||
print("async cohere stream response", response)
|
||||
async for chunk in response:
|
||||
print(chunk)
|
||||
else:
|
||||
response = completion(
|
||||
model="cohere_chat/command-r",
|
||||
messages=messages,
|
||||
max_tokens=10,
|
||||
stream=True,
|
||||
)
|
||||
print(response)
|
||||
for chunk in response:
|
||||
print(chunk)
|
||||
except litellm.APIConnectionError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_request_body_with_allowed_params():
|
||||
"""
|
||||
Test to validate that when allowed_openai_params is provided, the request body contains
|
||||
the correct response_format and reasoning_effort values.
|
||||
"""
|
||||
# Define test parameters
|
||||
test_response_format = {"type": "json"}
|
||||
test_reasoning_effort = "low"
|
||||
test_tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_time",
|
||||
"description": "Get the current time in a given location.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {"type": "string", "description": "The city name, e.g. San Francisco"}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}]
|
||||
|
||||
client = AsyncHTTPHandler()
|
||||
|
||||
# Mock the post method
|
||||
with patch.object(client, "post", new=AsyncMock()) as mock_post:
|
||||
try:
|
||||
await litellm.acompletion(
|
||||
model="cohere/command",
|
||||
messages=[{"content": "what llm are you", "role": "user"}],
|
||||
allowed_openai_params=["tools", "response_format", "reasoning_effort"],
|
||||
response_format=test_response_format,
|
||||
reasoning_effort=test_reasoning_effort,
|
||||
tools=test_tools,
|
||||
client=client
|
||||
)
|
||||
except Exception:
|
||||
pass # We only care about the request body validation
|
||||
|
||||
# Verify the API call was made
|
||||
mock_post.assert_called_once()
|
||||
|
||||
# Get and parse the request body
|
||||
request_data = json.loads(mock_post.call_args.kwargs["data"])
|
||||
print(f"request_data: {request_data}")
|
||||
|
||||
# Validate request contains our specified parameters
|
||||
assert "allowed_openai_params" not in request_data
|
||||
assert request_data["response_format"] == test_response_format
|
||||
assert request_data["reasoning_effort"] == test_reasoning_effort
|
||||
|
||||
|
||||
def test_cohere_embedding_outout_dimensions():
|
||||
litellm._turn_on_debug()
|
||||
response = embedding(model="cohere/embed-v4.0", input="Hello, world!", dimensions=512)
|
||||
print(f"response: {response}\n")
|
||||
assert len(response.data[0]["embedding"]) == 512
|
||||
|
||||
|
||||
# Comprehensive Cohere Embed v4 tests
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_embed_v4_basic_text(sync_mode):
|
||||
"""Test basic text embedding functionality with Cohere Embed v4."""
|
||||
try:
|
||||
data = {
|
||||
"model": "cohere/embed-v4.0",
|
||||
"input": ["Hello world!", "This is a test sentence."],
|
||||
"input_type": "search_document"
|
||||
}
|
||||
|
||||
if sync_mode:
|
||||
response = embedding(**data)
|
||||
else:
|
||||
response = await litellm.aembedding(**data)
|
||||
|
||||
# Validate response structure
|
||||
assert response.model is not None
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0]['object'] == 'embedding'
|
||||
assert len(response.data[0]['embedding']) > 0
|
||||
assert response.usage.prompt_tokens > 0
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_embed_v4_with_dimensions(sync_mode):
|
||||
"""Test Cohere Embed v4 with specific dimension parameter."""
|
||||
try:
|
||||
data = {
|
||||
"model": "cohere/embed-v4.0",
|
||||
"input": ["Test with custom dimensions"],
|
||||
"dimensions": 512,
|
||||
"input_type": "search_query"
|
||||
}
|
||||
|
||||
if sync_mode:
|
||||
response = embedding(**data)
|
||||
else:
|
||||
response = await litellm.aembedding(**data)
|
||||
|
||||
# Validate dimension
|
||||
assert len(response.data[0]['embedding']) == 512
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_embed_v4_image_embedding(sync_mode):
|
||||
"""Test Cohere Embed v4 image embedding functionality (multimodal)."""
|
||||
try:
|
||||
import base64
|
||||
|
||||
# 1x1 pixel red PNG (base64 encoded)
|
||||
test_image_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\x0cIDATx\x9cc\xf8\x00\x00\x00\x01\x00\x01\x00\x00\x00\x00'
|
||||
test_image_b64 = base64.b64encode(test_image_data).decode('utf-8')
|
||||
|
||||
data = {
|
||||
"model": "cohere/embed-v4.0",
|
||||
"input": [test_image_b64],
|
||||
"input_type": "image"
|
||||
}
|
||||
|
||||
if sync_mode:
|
||||
response = embedding(**data)
|
||||
else:
|
||||
response = await litellm.aembedding(**data)
|
||||
|
||||
# Validate response structure for image embedding
|
||||
assert response.model is not None
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]['object'] == 'embedding'
|
||||
assert len(response.data[0]['embedding']) > 0
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_type", ["search_document", "search_query", "classification", "clustering"])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_embed_v4_input_types(input_type):
|
||||
"""Test Cohere Embed v4 with different input types."""
|
||||
try:
|
||||
response = await litellm.aembedding(
|
||||
model="cohere/embed-v4.0",
|
||||
input=[f"Test text for {input_type}"],
|
||||
input_type=input_type
|
||||
)
|
||||
|
||||
assert response.model is not None
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]['object'] == 'embedding'
|
||||
assert len(response.data[0]['embedding']) > 0
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_cohere_embed_v4_encoding_format():
|
||||
"""Test Cohere Embed v4 with different encoding formats."""
|
||||
try:
|
||||
response = embedding(
|
||||
model="cohere/embed-v4.0",
|
||||
input=["Test encoding format"],
|
||||
encoding_format="float"
|
||||
)
|
||||
|
||||
assert response.model is not None
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]['object'] == 'embedding'
|
||||
assert len(response.data[0]['embedding']) > 0
|
||||
# Validate that embeddings are floats
|
||||
assert all(isinstance(x, float) for x in response.data[0]['embedding'])
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_cohere_embed_v4_error_handling():
|
||||
"""Test error handling for Cohere Embed v4 with invalid inputs."""
|
||||
try:
|
||||
# Test with empty input - should raise an error
|
||||
try:
|
||||
response = embedding(
|
||||
model="cohere/embed-v4.0",
|
||||
input=[] # Empty input
|
||||
)
|
||||
pytest.fail("Should have failed with empty input")
|
||||
except Exception:
|
||||
pass # Expected to fail
|
||||
|
||||
# Test with None input - should raise an error
|
||||
try:
|
||||
response = embedding(
|
||||
model="cohere/embed-v4.0",
|
||||
input=None
|
||||
)
|
||||
pytest.fail("Should have failed with None input")
|
||||
except Exception:
|
||||
pass # Expected to fail
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error in error handling test: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_cohere_embed_v4_multiple_texts(sync_mode):
|
||||
"""Test Cohere Embed v4 with multiple text inputs."""
|
||||
try:
|
||||
texts = [
|
||||
"The quick brown fox jumps over the lazy dog",
|
||||
"Machine learning is transforming the world",
|
||||
"Python is a versatile programming language",
|
||||
"Natural language processing enables human-computer interaction"
|
||||
]
|
||||
|
||||
data = {
|
||||
"model": "cohere/embed-v4.0",
|
||||
"input": texts,
|
||||
"input_type": "search_document"
|
||||
}
|
||||
|
||||
if sync_mode:
|
||||
response = embedding(**data)
|
||||
else:
|
||||
response = await litellm.aembedding(**data)
|
||||
|
||||
# Validate response structure
|
||||
assert response.model is not None
|
||||
assert len(response.data) == len(texts)
|
||||
|
||||
for i, data_item in enumerate(response.data):
|
||||
assert data_item['object'] == 'embedding'
|
||||
assert data_item['index'] == i
|
||||
assert len(data_item['embedding']) > 0
|
||||
assert all(isinstance(x, float) for x in data_item['embedding'])
|
||||
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
assert response.usage.prompt_tokens > 0
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_cohere_embed_v4_with_optional_params():
|
||||
"""Test Cohere Embed v4 with various optional parameters."""
|
||||
try:
|
||||
response = embedding(
|
||||
model="cohere/embed-v4.0",
|
||||
input=["Test with optional parameters"],
|
||||
input_type="search_query",
|
||||
dimensions=256,
|
||||
encoding_format="float"
|
||||
)
|
||||
|
||||
# Validate response
|
||||
assert response.model is not None
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0]['object'] == 'embedding'
|
||||
assert len(response.data[0]['embedding']) == 256 # Custom dimensions
|
||||
assert all(isinstance(x, float) for x in response.data[0]['embedding'])
|
||||
assert isinstance(response.usage, litellm.Usage)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
Reference in New Issue
Block a user