Added LiteLLM to the stack

This commit is contained in:
2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions

View File

@@ -0,0 +1,30 @@
model_list:
- model_name: gpt-3.5-turbo-instruct
litellm_params:
model: ollama/zephyr
- model_name: gpt-4
litellm_params:
model: ollama/llama2
- model_name: gpt-3.5-turbo
litellm_params:
model: ollama/llama2
temperature: 0.1
max_tokens: 20
# request to gpt-4, response from ollama/llama2
# curl --location 'http://0.0.0.0:8000/chat/completions' \
# --header 'Content-Type: application/json' \
# --data ' {
# "model": "gpt-4",
# "messages": [
# {
# "role": "user",
# "content": "what llm are you"
# }
# ],
# }
# '
#
# {"id":"chatcmpl-27c85cf0-ab09-4bcf-8cb1-0ee950520743","choices":[{"finish_reason":"stop","index":0,"message":{"content":" Hello! I'm just an AI, I don't have personal experiences or emotions like humans do. However, I can help you with any questions or tasks you may have! Is there something specific you'd like to know or discuss?","role":"assistant","_logprobs":null}}],"created":1700094955.373751,"model":"ollama/llama2","object":"chat.completion","system_fingerprint":null,"usage":{"prompt_tokens":12,"completion_tokens":47,"total_tokens":59},"_response_ms":8028.017999999999}%

View File

@@ -0,0 +1,15 @@
model_list:
- model_name: gpt-4-team1
litellm_params:
model: azure/chatgpt-v-3
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY
tpm: 20_000
- model_name: gpt-4-team2
litellm_params:
model: azure/gpt-4
api_key: os.environ/AZURE_API_KEY
api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
tpm: 100_000

View File

@@ -0,0 +1,7 @@
model_list:
- model_name: "openai-model"
litellm_params:
model: "gpt-3.5-turbo"
litellm_settings:
cache: True

View File

@@ -0,0 +1,11 @@
model_list:
- model_name: "openai-model"
litellm_params:
model: "gpt-3.5-turbo"
litellm_settings:
cache: True
cache_params:
type: "redis"
supported_call_types: ["embedding", "aembedding"]
host: "os.environ/REDIS_HOST"

View File

@@ -0,0 +1,48 @@
model_list:
################################################################################
# Azure
- model_name: gpt-4o-mini
litellm_params:
model: azure/gpt-4o-mini
api_base: https://amazin-prod.openai.azure.com
api_key: "os.environ/AZURE_GPT_4O"
deployment_id: gpt-4o-mini
- model_name: gpt-4o
litellm_params:
model: azure/gpt-4o
api_base: https://very-cool-prod.openai.azure.com
api_key: "os.environ/AZURE_GPT_4O"
deployment_id: gpt-4o
################################################################################
# Fireworks
- model_name: fireworks-llama-v3p1-405b-instruct
litellm_params:
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct
api_key: "os.environ/FIREWORKS"
- model_name: fireworks-llama-v3p1-70b-instruct
litellm_params:
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct
api_key: "os.environ/FIREWORKS"
general_settings:
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
success_callback: ["prometheus"]
service_callback: ["prometheus_system"]
drop_params: False # Raise an exception if the openai param being passed in isn't supported.
cache: false
default_internal_user_params:
user_role: os.environ/DEFAULT_USER_ROLE
success_callback: ["s3"]
s3_callback_params:
s3_bucket_name: logs-bucket-litellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
s3_path: my-test-path # [OPTIONAL] set path in bucket you want to write logs to
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 buckets
router_settings:
routing_strategy: simple-shuffle # "simple-shuffle" shown to result in highest throughput. https://docs.litellm.ai/docs/proxy/configs#load-balancing

View File

@@ -0,0 +1,5 @@
include:
- included_models.yaml
litellm_settings:
callbacks: ["prometheus"]

View File

@@ -0,0 +1,5 @@
include:
- non-existent-file.yaml
litellm_settings:
callbacks: ["prometheus"]

View File

@@ -0,0 +1,6 @@
include:
- models_file_1.yaml
- models_file_2.yaml
litellm_settings:
callbacks: ["prometheus"]

View File

@@ -0,0 +1,4 @@
model_list:
- model_name: included-model
litellm_params:
model: gpt-4

View File

@@ -0,0 +1,7 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_settings:
drop_params: True
success_callback: ["langfuse"] # https://docs.litellm.ai/docs/observability/langfuse_integration

View File

@@ -0,0 +1,28 @@
litellm_settings:
drop_params: True
# Model-specific settings
model_list: # use the same model_name for using the litellm router. LiteLLM will use the router between gpt-3.5-turbo
- model_name: gpt-3.5-turbo # litellm will
litellm_params:
model: gpt-3.5-turbo
api_key: sk-uj6F
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
api_key: sk-Imn
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
- model_name: gpt-3.5-turbo
litellm_params:
model: openrouter/gpt-3.5-turbo
- model_name: mistral-7b-instruct
litellm_params:
model: mistralai/mistral-7b-instruct
environment_variables:
REDIS_HOST: localhost
REDIS_PASSWORD:
REDIS_PORT:

View File

@@ -0,0 +1,4 @@
model_list:
- model_name: included-model-1
litellm_params:
model: gpt-4

View File

@@ -0,0 +1,4 @@
model_list:
- model_name: included-model-2
litellm_params:
model: gpt-3.5-turbo

View File

@@ -0,0 +1,7 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
general_settings:
otel: True # OpenTelemetry Logger this logs OTEL data to your collector

View File

@@ -0,0 +1,4 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo