Added LiteLLM to the stack
This commit is contained in:
@@ -0,0 +1,116 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"bedrock/mistral.mistral-7b-instruct-v0:2",
|
||||
"openai/gpt-4o",
|
||||
"openai/self_hosted",
|
||||
"bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
],
|
||||
)
|
||||
async def test_litellm_overhead(model):
|
||||
|
||||
litellm._turn_on_debug()
|
||||
start_time = datetime.now()
|
||||
if model == "openai/self_hosted":
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||
api_base="https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
)
|
||||
else:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||
)
|
||||
end_time = datetime.now()
|
||||
total_time_ms = (end_time - start_time).total_seconds() * 1000
|
||||
print(response)
|
||||
print(response._hidden_params)
|
||||
litellm_overhead_ms = response._hidden_params["litellm_overhead_time_ms"]
|
||||
# calculate percent of overhead caused by litellm
|
||||
overhead_percent = litellm_overhead_ms * 100 / total_time_ms
|
||||
print("##########################\n")
|
||||
print("total_time_ms", total_time_ms)
|
||||
print("response litellm_overhead_ms", litellm_overhead_ms)
|
||||
print("litellm overhead_percent {}%".format(overhead_percent))
|
||||
print("##########################\n")
|
||||
assert litellm_overhead_ms > 0
|
||||
assert litellm_overhead_ms < 1000
|
||||
|
||||
# latency overhead should be less than total request time
|
||||
assert litellm_overhead_ms < (end_time - start_time).total_seconds() * 1000
|
||||
|
||||
# latency overhead should be under 40% of total request time
|
||||
assert overhead_percent < 40
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"bedrock/mistral.mistral-7b-instruct-v0:2",
|
||||
"openai/gpt-4o",
|
||||
"bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"openai/self_hosted",
|
||||
],
|
||||
)
|
||||
async def test_litellm_overhead_stream(model):
|
||||
|
||||
litellm._turn_on_debug()
|
||||
start_time = datetime.now()
|
||||
if model == "openai/self_hosted":
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||
api_base="https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
stream=True,
|
||||
)
|
||||
else:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
async for chunk in response:
|
||||
print()
|
||||
|
||||
end_time = datetime.now()
|
||||
total_time_ms = (end_time - start_time).total_seconds() * 1000
|
||||
print(response)
|
||||
print(response._hidden_params)
|
||||
litellm_overhead_ms = response._hidden_params["litellm_overhead_time_ms"]
|
||||
# calculate percent of overhead caused by litellm
|
||||
overhead_percent = litellm_overhead_ms * 100 / total_time_ms
|
||||
print("##########################\n")
|
||||
print("total_time_ms", total_time_ms)
|
||||
print("response litellm_overhead_ms", litellm_overhead_ms)
|
||||
print("litellm overhead_percent {}%".format(overhead_percent))
|
||||
print("##########################\n")
|
||||
assert litellm_overhead_ms > 0
|
||||
assert litellm_overhead_ms < 1000
|
||||
|
||||
# latency overhead should be less than total request time
|
||||
assert litellm_overhead_ms < (end_time - start_time).total_seconds() * 1000
|
||||
|
||||
# latency overhead should be under 40% of total request time
|
||||
assert overhead_percent < 40
|
||||
|
||||
pass
|
Reference in New Issue
Block a user