Added LiteLLM to the stack
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo-instruct
|
||||
litellm_params:
|
||||
model: ollama/zephyr
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: ollama/llama2
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: ollama/llama2
|
||||
temperature: 0.1
|
||||
max_tokens: 20
|
||||
|
||||
|
||||
# request to gpt-4, response from ollama/llama2
|
||||
# curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||
# --header 'Content-Type: application/json' \
|
||||
# --data ' {
|
||||
# "model": "gpt-4",
|
||||
# "messages": [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "what llm are you"
|
||||
# }
|
||||
# ],
|
||||
# }
|
||||
# '
|
||||
#
|
||||
|
||||
# {"id":"chatcmpl-27c85cf0-ab09-4bcf-8cb1-0ee950520743","choices":[{"finish_reason":"stop","index":0,"message":{"content":" Hello! I'm just an AI, I don't have personal experiences or emotions like humans do. However, I can help you with any questions or tasks you may have! Is there something specific you'd like to know or discuss?","role":"assistant","_logprobs":null}}],"created":1700094955.373751,"model":"ollama/llama2","object":"chat.completion","system_fingerprint":null,"usage":{"prompt_tokens":12,"completion_tokens":47,"total_tokens":59},"_response_ms":8028.017999999999}%
|
@@ -0,0 +1,15 @@
|
||||
model_list:
|
||||
- model_name: gpt-4-team1
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-3
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
tpm: 20_000
|
||||
- model_name: gpt-4-team2
|
||||
litellm_params:
|
||||
model: azure/gpt-4
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
|
||||
tpm: 100_000
|
||||
|
@@ -0,0 +1,7 @@
|
||||
model_list:
|
||||
- model_name: "openai-model"
|
||||
litellm_params:
|
||||
model: "gpt-3.5-turbo"
|
||||
|
||||
litellm_settings:
|
||||
cache: True
|
@@ -0,0 +1,11 @@
|
||||
model_list:
|
||||
- model_name: "openai-model"
|
||||
litellm_params:
|
||||
model: "gpt-3.5-turbo"
|
||||
|
||||
litellm_settings:
|
||||
cache: True
|
||||
cache_params:
|
||||
type: "redis"
|
||||
supported_call_types: ["embedding", "aembedding"]
|
||||
host: "os.environ/REDIS_HOST"
|
@@ -0,0 +1,48 @@
|
||||
model_list:
|
||||
################################################################################
|
||||
# Azure
|
||||
- model_name: gpt-4o-mini
|
||||
litellm_params:
|
||||
model: azure/gpt-4o-mini
|
||||
api_base: https://amazin-prod.openai.azure.com
|
||||
api_key: "os.environ/AZURE_GPT_4O"
|
||||
deployment_id: gpt-4o-mini
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: azure/gpt-4o
|
||||
api_base: https://very-cool-prod.openai.azure.com
|
||||
api_key: "os.environ/AZURE_GPT_4O"
|
||||
deployment_id: gpt-4o
|
||||
|
||||
################################################################################
|
||||
# Fireworks
|
||||
- model_name: fireworks-llama-v3p1-405b-instruct
|
||||
litellm_params:
|
||||
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
api_key: "os.environ/FIREWORKS"
|
||||
- model_name: fireworks-llama-v3p1-70b-instruct
|
||||
litellm_params:
|
||||
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
api_key: "os.environ/FIREWORKS"
|
||||
|
||||
general_settings:
|
||||
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
||||
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
|
||||
success_callback: ["prometheus"]
|
||||
service_callback: ["prometheus_system"]
|
||||
drop_params: False # Raise an exception if the openai param being passed in isn't supported.
|
||||
cache: false
|
||||
default_internal_user_params:
|
||||
user_role: os.environ/DEFAULT_USER_ROLE
|
||||
|
||||
success_callback: ["s3"]
|
||||
s3_callback_params:
|
||||
s3_bucket_name: logs-bucket-litellm # AWS Bucket Name for S3
|
||||
s3_region_name: us-west-2 # AWS Region Name for S3
|
||||
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
||||
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
||||
s3_path: my-test-path # [OPTIONAL] set path in bucket you want to write logs to
|
||||
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 buckets
|
||||
|
||||
router_settings:
|
||||
routing_strategy: simple-shuffle # "simple-shuffle" shown to result in highest throughput. https://docs.litellm.ai/docs/proxy/configs#load-balancing
|
@@ -0,0 +1,5 @@
|
||||
include:
|
||||
- included_models.yaml
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
@@ -0,0 +1,5 @@
|
||||
include:
|
||||
- non-existent-file.yaml
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
@@ -0,0 +1,6 @@
|
||||
include:
|
||||
- models_file_1.yaml
|
||||
- models_file_2.yaml
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
@@ -0,0 +1,4 @@
|
||||
model_list:
|
||||
- model_name: included-model
|
||||
litellm_params:
|
||||
model: gpt-4
|
@@ -0,0 +1,7 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
success_callback: ["langfuse"] # https://docs.litellm.ai/docs/observability/langfuse_integration
|
||||
|
@@ -0,0 +1,28 @@
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
|
||||
# Model-specific settings
|
||||
model_list: # use the same model_name for using the litellm router. LiteLLM will use the router between gpt-3.5-turbo
|
||||
- model_name: gpt-3.5-turbo # litellm will
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: sk-uj6F
|
||||
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
|
||||
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: sk-Imn
|
||||
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
|
||||
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openrouter/gpt-3.5-turbo
|
||||
- model_name: mistral-7b-instruct
|
||||
litellm_params:
|
||||
model: mistralai/mistral-7b-instruct
|
||||
|
||||
environment_variables:
|
||||
REDIS_HOST: localhost
|
||||
REDIS_PASSWORD:
|
||||
REDIS_PORT:
|
@@ -0,0 +1,4 @@
|
||||
model_list:
|
||||
- model_name: included-model-1
|
||||
litellm_params:
|
||||
model: gpt-4
|
@@ -0,0 +1,4 @@
|
||||
model_list:
|
||||
- model_name: included-model-2
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
@@ -0,0 +1,7 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
|
||||
general_settings:
|
||||
otel: True # OpenTelemetry Logger this logs OTEL data to your collector
|
@@ -0,0 +1,4 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
Reference in New Issue
Block a user