Added LiteLLM to the stack
This commit is contained in:
112
Development/litellm/tests/local_testing/test_cost_calc.py
Normal file
112
Development/litellm/tests/local_testing/test_cost_calc.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import io
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system-path
|
||||
from typing import Literal
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
import litellm
|
||||
from litellm import Router, completion_cost, stream_chunk_builder
|
||||
|
||||
models = [
|
||||
dict(
|
||||
model_name="openai/gpt-3.5-turbo",
|
||||
),
|
||||
dict(
|
||||
model_name="anthropic/claude-3-haiku-20240307",
|
||||
),
|
||||
dict(
|
||||
model_name="together_ai/meta-llama/Llama-2-7b-chat-hf",
|
||||
),
|
||||
]
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": m["model_name"],
|
||||
"litellm_params": {
|
||||
"model": m.get("model", m["model_name"]),
|
||||
},
|
||||
}
|
||||
for m in models
|
||||
],
|
||||
routing_strategy="simple-shuffle",
|
||||
num_retries=3,
|
||||
retry_after=1,
|
||||
timeout=60.0,
|
||||
allowed_fails=2,
|
||||
cooldown_time=0,
|
||||
debug_level="INFO",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"openai/gpt-3.5-turbo",
|
||||
# "anthropic/claude-3-haiku-20240307",
|
||||
# "together_ai/meta-llama/Llama-2-7b-chat-hf",
|
||||
],
|
||||
)
|
||||
def test_run(model: str):
|
||||
"""
|
||||
Relevant issue - https://github.com/BerriAI/litellm/issues/4965
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
prompt = "Hi"
|
||||
kwargs = dict(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.001,
|
||||
top_p=0.001,
|
||||
max_tokens=20,
|
||||
input_cost_per_token=2,
|
||||
output_cost_per_token=2,
|
||||
)
|
||||
|
||||
print(f"--------- {model} ---------")
|
||||
print(f"Prompt: {prompt}")
|
||||
|
||||
response = router.completion(**kwargs) # type: ignore
|
||||
non_stream_output = response.choices[0].message.content.replace("\n", "") # type: ignore
|
||||
non_stream_cost_calc = response._hidden_params["response_cost"] * 100
|
||||
|
||||
print(f"Non-stream output: {non_stream_output}")
|
||||
print(f"Non-stream usage : {response.usage}") # type: ignore
|
||||
non_stream_usage = response.usage
|
||||
try:
|
||||
print(
|
||||
f"Non-stream cost : {response._hidden_params['response_cost'] * 100:.4f}"
|
||||
)
|
||||
except TypeError:
|
||||
print("Non-stream cost : NONE")
|
||||
print(f"Non-stream cost : {completion_cost(response) * 100:.4f} (response)")
|
||||
|
||||
response = router.completion(**kwargs, stream=True, stream_options={"include_usage": True}) # type: ignore
|
||||
response = stream_chunk_builder(list(response), messages=kwargs["messages"]) # type: ignore
|
||||
output = response.choices[0].message.content.replace("\n", "") # type: ignore
|
||||
|
||||
if response.usage.completion_tokens != non_stream_usage.completion_tokens:
|
||||
pytest.skip(
|
||||
"LLM API returning inconsistent usage"
|
||||
) # handles transient openai errors
|
||||
streaming_cost_calc = completion_cost(response) * 100
|
||||
print(f"Stream output : {output}")
|
||||
|
||||
print(f"Stream usage : {response.usage}") # type: ignore
|
||||
print(f"Stream cost : {streaming_cost_calc} (response)")
|
||||
print("")
|
||||
if output == non_stream_output:
|
||||
# assert cost is the same
|
||||
assert streaming_cost_calc == non_stream_cost_calc
|
Reference in New Issue
Block a user