Removed Development/ directory from repository
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@
|
||||
|
||||
# Service configuration and data directories
|
||||
Services/
|
||||
Development/
|
||||
|
||||
# Docker volumes and bind mounts from external paths
|
||||
docker-local/
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,17 +0,0 @@
|
||||
# used by CI/CD testing
|
||||
openai==1.99.5
|
||||
python-dotenv
|
||||
tiktoken
|
||||
importlib_metadata
|
||||
cohere
|
||||
redis==5.2.1
|
||||
redisvl==0.4.1
|
||||
anthropic
|
||||
orjson==3.10.12 # fast /embedding responses
|
||||
pydantic==2.10.2
|
||||
google-cloud-aiplatform==1.43.0
|
||||
google-cloud-iam==2.19.1
|
||||
fastapi-sso==0.16.0
|
||||
uvloop==0.21.0
|
||||
mcp==1.10.1 # for MCP server
|
||||
semantic_router==0.1.10 # for auto-routing with litellm
|
@@ -1,52 +0,0 @@
|
||||
{
|
||||
"name": "Python 3.11",
|
||||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||
"image": "mcr.microsoft.com/devcontainers/python:3.11-bookworm",
|
||||
// https://github.com/devcontainers/images/tree/main/src/python
|
||||
// https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
|
||||
|
||||
// "build": {
|
||||
// "dockerfile": "Dockerfile",
|
||||
// "context": ".."
|
||||
// },
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
|
||||
// Configure tool-specific properties.
|
||||
"customizations": {
|
||||
// Configure properties specific to VS Code.
|
||||
"vscode": {
|
||||
"settings": {},
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-python.vscode-pylance",
|
||||
"GitHub.copilot",
|
||||
"GitHub.copilot-chat",
|
||||
"ms-python.autopep8"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
"forwardPorts": [4000],
|
||||
|
||||
"containerEnv": {
|
||||
"LITELLM_LOG": "DEBUG"
|
||||
},
|
||||
|
||||
// Use 'portsAttributes' to set default properties for specific forwarded ports.
|
||||
// More info: https://containers.dev/implementors/json_reference/#port-attributes
|
||||
"portsAttributes": {
|
||||
"4000": {
|
||||
"label": "LiteLLM Server",
|
||||
"onAutoForward": "notify"
|
||||
}
|
||||
},
|
||||
|
||||
// More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "litellm",
|
||||
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
"postCreateCommand": "pipx install poetry && poetry install -E extra_proxy -E proxy"
|
||||
}
|
@@ -1,31 +0,0 @@
|
||||
# OpenAI
|
||||
OPENAI_API_KEY = ""
|
||||
OPENAI_BASE_URL = ""
|
||||
# Cohere
|
||||
COHERE_API_KEY = ""
|
||||
# OpenRouter
|
||||
OR_SITE_URL = ""
|
||||
OR_APP_NAME = "LiteLLM Example app"
|
||||
OR_API_KEY = ""
|
||||
# Azure API base URL
|
||||
AZURE_API_BASE = ""
|
||||
# Azure API version
|
||||
AZURE_API_VERSION = ""
|
||||
# Azure API key
|
||||
AZURE_API_KEY = ""
|
||||
# Replicate
|
||||
REPLICATE_API_KEY = ""
|
||||
REPLICATE_API_TOKEN = ""
|
||||
# Anthropic
|
||||
ANTHROPIC_API_KEY = ""
|
||||
# Infisical
|
||||
INFISICAL_TOKEN = ""
|
||||
# Novita AI
|
||||
NOVITA_API_KEY = ""
|
||||
# INFINITY
|
||||
INFINITY_API_KEY = ""
|
||||
|
||||
# Development Configs
|
||||
LITELLM_MASTER_KEY = "sk-1234"
|
||||
DATABASE_URL = "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
|
||||
STORE_MODEL_IN_DB = "True"
|
@@ -1,46 +0,0 @@
|
||||
[flake8]
|
||||
ignore =
|
||||
# The following ignores can be removed when formatting using black
|
||||
W191,W291,W292,W293,W391,W504
|
||||
E101,E111,E114,E116,E117,E121,E122,E123,E124,E125,E126,E127,E128,E129,E131,
|
||||
E201,E202,E221,E222,E225,E226,E231,E241,E251,E252,E261,E265,E271,E272,E275,
|
||||
E301,E302,E303,E305,E306,
|
||||
# line break before binary operator
|
||||
W503,
|
||||
# inline comment should start with '# '
|
||||
E262,
|
||||
# too many leading '#' for block comment
|
||||
E266,
|
||||
# multiple imports on one line
|
||||
E401,
|
||||
# module level import not at top of file
|
||||
E402,
|
||||
# Line too long (82 > 79 characters)
|
||||
E501,
|
||||
# comparison to None should be 'if cond is None:'
|
||||
E711,
|
||||
# comparison to True should be 'if cond is True:' or 'if cond:'
|
||||
E712,
|
||||
# do not compare types, for exact checks use `is` / `is not`, for instance checks use `isinstance()`
|
||||
E721,
|
||||
# do not use bare 'except'
|
||||
E722,
|
||||
# x is imported but unused
|
||||
F401,
|
||||
# 'from . import *' used; unable to detect undefined names
|
||||
F403,
|
||||
# x may be undefined, or defined from star imports:
|
||||
F405,
|
||||
# f-string is missing placeholders
|
||||
F541,
|
||||
# dictionary key '' repeated with different values
|
||||
F601,
|
||||
# redefinition of unused x from line 123
|
||||
F811,
|
||||
# undefined name x
|
||||
F821,
|
||||
# local variable x is assigned to but never used
|
||||
F841,
|
||||
|
||||
# https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8
|
||||
extend-ignore = E203
|
@@ -1,10 +0,0 @@
|
||||
# Add the commit hash of any commit you want to ignore in `git blame` here.
|
||||
# One commit hash per line.
|
||||
#
|
||||
# The GitHub Blame UI will use this file automatically!
|
||||
#
|
||||
# Run this command to always ignore formatting commits in `git blame`
|
||||
# git config blame.ignoreRevsFile .git-blame-ignore-revs
|
||||
|
||||
# Update pydantic code to fix warnings (GH-3600)
|
||||
876840e9957bc7e9f7d6a2b58c4d7c53dad16481
|
1
Development/litellm/.gitattributes
vendored
1
Development/litellm/.gitattributes
vendored
@@ -1 +0,0 @@
|
||||
*.ipynb linguist-vendored
|
13
Development/litellm/.github/FUNDING.yml
vendored
13
Development/litellm/.github/FUNDING.yml
vendored
@@ -1,13 +0,0 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
otechie: # Replace with a single Otechie username
|
||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||
custom: https://buy.stripe.com/9AQ03Kd3P91o0Q8bIS
|
@@ -1,49 +0,0 @@
|
||||
name: Bug Report
|
||||
description: File a bug report
|
||||
title: "[Bug]: "
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
value: "A bug happened!"
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
||||
- type: dropdown
|
||||
id: ml-ops-team
|
||||
attributes:
|
||||
label: Are you a ML Ops Team?
|
||||
description: This helps us prioritize your requests correctly
|
||||
options:
|
||||
- "No"
|
||||
- "Yes"
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: What LiteLLM version are you on ?
|
||||
placeholder: v1.53.1
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: contact
|
||||
attributes:
|
||||
label: Twitter / LinkedIn details
|
||||
description: We announce new features on Twitter + LinkedIn. If this issue leads to an announcement, and you'd like a mention, we'll gladly shout you out!
|
||||
placeholder: ex. @krrish_dh / https://www.linkedin.com/in/krish-d/
|
||||
validations:
|
||||
required: false
|
@@ -1,8 +0,0 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Schedule Demo
|
||||
url: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat
|
||||
about: Speak directly with Krrish and Ishaan, the founders, to discuss issues, share feedback, or explore improvements for LiteLLM
|
||||
- name: Discord
|
||||
url: https://discord.com/invite/wuPM9dRgDw
|
||||
about: Join 250+ LiteLLM community members!
|
@@ -1,42 +0,0 @@
|
||||
name: 🚀 Feature Request
|
||||
description: Submit a proposal/request for a new LiteLLM feature.
|
||||
title: "[Feature]: "
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for making LiteLLM better!
|
||||
- type: textarea
|
||||
id: the-feature
|
||||
attributes:
|
||||
label: The Feature
|
||||
description: A clear and concise description of the feature proposal
|
||||
placeholder: Tell us what you want!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: motivation
|
||||
attributes:
|
||||
label: Motivation, pitch
|
||||
description: Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., "I'm working on X and would like Y to be possible". If this is related to another GitHub issue, please link here too.
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: hiring-interest
|
||||
attributes:
|
||||
label: LiteLLM is hiring a founding backend engineer, are you interested in joining us and shipping to all our users?
|
||||
description: If yes, apply here - https://www.ycombinator.com/companies/litellm/jobs/6uvoBp3-founding-backend-engineer
|
||||
options:
|
||||
- "No"
|
||||
- "Yes"
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: contact
|
||||
attributes:
|
||||
label: Twitter / LinkedIn details
|
||||
description: We announce new features on Twitter + LinkedIn. When this is announced, and you'd like a mention, we'll gladly shout you out!
|
||||
placeholder: ex. @krrish_dh / https://www.linkedin.com/in/krish-d/
|
||||
validations:
|
||||
required: false
|
@@ -1,77 +0,0 @@
|
||||
name: Helm OCI Chart Releaser
|
||||
description: Push Helm charts to OCI-based (Docker) registries
|
||||
author: sergeyshaykhullin
|
||||
branding:
|
||||
color: yellow
|
||||
icon: upload-cloud
|
||||
inputs:
|
||||
name:
|
||||
required: true
|
||||
description: Chart name
|
||||
repository:
|
||||
required: true
|
||||
description: Chart repository name
|
||||
tag:
|
||||
required: true
|
||||
description: Chart version
|
||||
app_version:
|
||||
required: true
|
||||
description: App version
|
||||
path:
|
||||
required: false
|
||||
description: Chart path (Default 'charts/{name}')
|
||||
registry:
|
||||
required: true
|
||||
description: OCI registry
|
||||
registry_username:
|
||||
required: true
|
||||
description: OCI registry username
|
||||
registry_password:
|
||||
required: true
|
||||
description: OCI registry password
|
||||
update_dependencies:
|
||||
required: false
|
||||
default: 'false'
|
||||
description: Update chart dependencies before packaging (Default 'false')
|
||||
outputs:
|
||||
image:
|
||||
value: ${{ steps.output.outputs.image }}
|
||||
description: Chart image (Default '{registry}/{repository}/{image}:{tag}')
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Helm | Login
|
||||
shell: bash
|
||||
run: echo ${{ inputs.registry_password }} | helm registry login -u ${{ inputs.registry_username }} --password-stdin ${{ inputs.registry }}
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
- name: Helm | Dependency
|
||||
if: inputs.update_dependencies == 'true'
|
||||
shell: bash
|
||||
run: helm dependency update ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
- name: Helm | Package
|
||||
shell: bash
|
||||
run: helm package ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }} --version ${{ inputs.tag }} --app-version ${{ inputs.app_version }}
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
- name: Helm | Push
|
||||
shell: bash
|
||||
run: helm push ${{ inputs.name }}-${{ inputs.tag }}.tgz oci://${{ inputs.registry }}/${{ inputs.repository }}
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
- name: Helm | Logout
|
||||
shell: bash
|
||||
run: helm registry logout ${{ inputs.registry }}
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
- name: Helm | Output
|
||||
id: output
|
||||
shell: bash
|
||||
run: echo "image=${{ inputs.registry }}/${{ inputs.repository }}/${{ inputs.name }}:${{ inputs.tag }}" >> $GITHUB_OUTPUT
|
10
Development/litellm/.github/dependabot.yaml
vendored
10
Development/litellm/.github/dependabot.yaml
vendored
@@ -1,10 +0,0 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
groups:
|
||||
github-actions:
|
||||
patterns:
|
||||
- "*"
|
BIN
Development/litellm/.github/deploy-to-aws.png
vendored
BIN
Development/litellm/.github/deploy-to-aws.png
vendored
Binary file not shown.
Before Width: | Height: | Size: 2.8 KiB |
@@ -1,33 +0,0 @@
|
||||
## Title
|
||||
|
||||
<!-- e.g. "Implement user authentication feature" -->
|
||||
|
||||
## Relevant issues
|
||||
|
||||
<!-- e.g. "Fixes #000" -->
|
||||
|
||||
## Pre-Submission checklist
|
||||
|
||||
**Please complete all items before asking a LiteLLM maintainer to review your PR**
|
||||
|
||||
- [ ] I have Added testing in the [`tests/litellm/`](https://github.com/BerriAI/litellm/tree/main/tests/litellm) directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
|
||||
- [ ] I have added a screenshot of my new test passing locally
|
||||
- [ ] My PR passes all unit tests on [`make test-unit`](https://docs.litellm.ai/docs/extras/contributing_code)
|
||||
- [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem
|
||||
|
||||
|
||||
## Type
|
||||
|
||||
<!-- Select the type of Pull Request -->
|
||||
<!-- Keep only the necessary ones -->
|
||||
|
||||
🆕 New Feature
|
||||
🐛 Bug Fix
|
||||
🧹 Refactoring
|
||||
📖 Documentation
|
||||
🚄 Infrastructure
|
||||
✅ Test
|
||||
|
||||
## Changes
|
||||
|
||||
|
133
Development/litellm/.github/scripts/scan_keywords.py
vendored
133
Development/litellm/.github/scripts/scan_keywords.py
vendored
@@ -1,133 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
|
||||
def read_event_payload() -> dict:
|
||||
event_path = os.environ.get("GITHUB_EVENT_PATH")
|
||||
if not event_path or not os.path.exists(event_path):
|
||||
return {}
|
||||
with open(event_path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def get_issue_text(event: dict) -> tuple[str, str, int, str, str]:
|
||||
issue = event.get("issue") or {}
|
||||
title = (issue.get("title") or "").strip()
|
||||
body = (issue.get("body") or "").strip()
|
||||
number = issue.get("number") or 0
|
||||
html_url = issue.get("html_url") or ""
|
||||
author = ((issue.get("user") or {}).get("login") or "").strip()
|
||||
return title, body, number, html_url, author
|
||||
|
||||
|
||||
def detect_keywords(text: str, keywords: list[str]) -> list[str]:
|
||||
lowered = text.lower()
|
||||
matches = []
|
||||
for keyword in keywords:
|
||||
k = keyword.strip().lower()
|
||||
if not k:
|
||||
continue
|
||||
if k in lowered:
|
||||
matches.append(keyword.strip())
|
||||
# Deduplicate while preserving order
|
||||
seen = set()
|
||||
unique_matches = []
|
||||
for m in matches:
|
||||
if m not in seen:
|
||||
unique_matches.append(m)
|
||||
seen.add(m)
|
||||
return unique_matches
|
||||
|
||||
|
||||
def send_webhook(webhook_url: str, payload: dict) -> None:
|
||||
if not webhook_url:
|
||||
return
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
webhook_url,
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f"Webhook HTTP error: {e.code} {e.reason}", file=sys.stderr)
|
||||
except urllib.error.URLError as e:
|
||||
print(f"Webhook URL error: {e.reason}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"Webhook unexpected error: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
def _excerpt(text: str, max_len: int = 400) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# Keep original formatting
|
||||
if len(text) <= max_len:
|
||||
return text
|
||||
return text[: max_len - 1] + "…"
|
||||
|
||||
|
||||
|
||||
def main() -> int:
|
||||
event = read_event_payload()
|
||||
if not event:
|
||||
print("::warning::No event payload found; exiting without labeling.")
|
||||
return 0
|
||||
|
||||
# Read issue details
|
||||
title, body, number, html_url, author = get_issue_text(event)
|
||||
combined_text = f"{title}\n\n{body}".strip()
|
||||
|
||||
# Keywords from env or defaults
|
||||
keywords_env = os.environ.get("KEYWORDS", "")
|
||||
default_keywords = ["azure", "openai", "bedrock", "vertexai", "vertex ai", "anthropic"]
|
||||
keywords = [k.strip() for k in keywords_env.split(",")] if keywords_env else default_keywords
|
||||
|
||||
matches = detect_keywords(combined_text, keywords)
|
||||
found = bool(matches)
|
||||
|
||||
# Emit outputs
|
||||
github_output = os.environ.get("GITHUB_OUTPUT")
|
||||
if github_output:
|
||||
with open(github_output, "a", encoding="utf-8") as fh:
|
||||
fh.write(f"found={'true' if found else 'false'}\n")
|
||||
fh.write(f"matches={','.join(matches)}\n")
|
||||
|
||||
# Optional webhook notification
|
||||
webhook_url = os.environ.get("PROVIDER_ISSUE_WEBHOOK_URL", "").strip()
|
||||
if found and webhook_url:
|
||||
repo_full = (event.get("repository") or {}).get("full_name", "")
|
||||
title_part = f"*{title}*" if title else "New issue"
|
||||
author_part = f" by @{author}" if author else ""
|
||||
body_preview = _excerpt(body)
|
||||
preview_block = f"\n{body_preview}" if body_preview else ""
|
||||
payload = {
|
||||
"text": (
|
||||
f"New issue 🚨\n"
|
||||
f"{title_part}\n\n{preview_block}\n"
|
||||
f"<{html_url}|View issue>\n"
|
||||
f"Author: {author}"
|
||||
)
|
||||
}
|
||||
send_webhook(webhook_url, payload)
|
||||
|
||||
# Print a short log line for Actions UI
|
||||
if found:
|
||||
print(f"Detected provider keywords: {', '.join(matches)}")
|
||||
else:
|
||||
print("No provider keywords detected.")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
|
94
Development/litellm/.github/template.yaml
vendored
94
Development/litellm/.github/template.yaml
vendored
@@ -1,94 +0,0 @@
|
||||
AWSTemplateFormatVersion: '2010-09-09'
|
||||
Transform: AWS::Serverless-2016-10-31
|
||||
Description: >
|
||||
llmlite-service
|
||||
|
||||
SAM Template for llmlite-service
|
||||
|
||||
# More info about Globals: https://github.com/awslabs/serverless-application-model/blob/master/docs/globals.rst
|
||||
Globals:
|
||||
Function:
|
||||
Timeout: 600
|
||||
MemorySize: 128
|
||||
Environment:
|
||||
Variables:
|
||||
WORKER_CONFIG: !Ref WorkerConfigParameter
|
||||
|
||||
Parameters:
|
||||
AliasParameter:
|
||||
Type: String
|
||||
Default: live
|
||||
WorkerConfigParameter:
|
||||
Type: String
|
||||
Description: Sample environment variable
|
||||
Default: '{"model": null, "alias": null, "api_base": null, "api_version": "2023-07-01-preview", "debug": false, "temperature": null, "max_tokens": null, "request_timeout": 600, "max_budget": null, "telemetry": true, "drop_params": false, "add_function_to_prompt": false, "headers": null, "save": false, "config": null, "use_queue": false}'
|
||||
|
||||
Resources:
|
||||
MyUrlFunctionPermissions:
|
||||
Type: AWS::Lambda::Permission
|
||||
Properties:
|
||||
FunctionName: !Ref URL
|
||||
Action: lambda:InvokeFunctionUrl
|
||||
Principal: "*"
|
||||
FunctionUrlAuthType: NONE
|
||||
|
||||
Function:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
FunctionName: !Sub "${AWS::StackName}-function"
|
||||
CodeUri: "./litellm"
|
||||
Handler: proxy/lambda.handler
|
||||
Runtime: python3.11
|
||||
AutoPublishAlias: !Ref AliasParameter
|
||||
Architectures:
|
||||
- x86_64
|
||||
DeploymentPreference:
|
||||
Type: AllAtOnce
|
||||
Alarms:
|
||||
- !Ref NewVersionErrorMetricGreaterThanZeroAlarm
|
||||
|
||||
NewVersionErrorMetricGreaterThanZeroAlarm:
|
||||
Type: "AWS::CloudWatch::Alarm"
|
||||
Properties:
|
||||
AlarmDescription: Lambda Function Error > 0
|
||||
ComparisonOperator: GreaterThanThreshold
|
||||
Dimensions:
|
||||
- Name: Resource
|
||||
Value: !Sub "${Function}:live"
|
||||
- Name: FunctionName
|
||||
Value: !Ref Function
|
||||
- Name: ExecutedVersion
|
||||
Value: !GetAtt Function.Version.Version
|
||||
EvaluationPeriods: 1
|
||||
Unit: Count
|
||||
MetricName: Errors
|
||||
Namespace: AWS/Lambda
|
||||
Period: 60
|
||||
Statistic: Sum
|
||||
Threshold: 0
|
||||
|
||||
URL:
|
||||
Type: AWS::Lambda::Url
|
||||
DependsOn: FunctionAliaslive
|
||||
Properties:
|
||||
AuthType: NONE
|
||||
Qualifier: live
|
||||
TargetFunctionArn: !GetAtt Function.Arn
|
||||
|
||||
Outputs:
|
||||
FunctionARN:
|
||||
Description: "Lambda Function ARN"
|
||||
Value: !GetAtt Function.Arn
|
||||
|
||||
FunctionUrl:
|
||||
Description: "Lambda Function URL Endpoint"
|
||||
Value:
|
||||
Fn::GetAtt: URL.FunctionUrl
|
||||
|
||||
FunctionVersion:
|
||||
Description: "Lambda Function Version"
|
||||
Value: !GetAtt Function.Version.Version
|
||||
|
||||
FunctionNewAlarmARN:
|
||||
Description: "Lambda Function New Alarm ARN"
|
||||
Value: !GetAtt NewVersionErrorMetricGreaterThanZeroAlarm.Arn
|
35
Development/litellm/.github/workflows/README.md
vendored
35
Development/litellm/.github/workflows/README.md
vendored
@@ -1,35 +0,0 @@
|
||||
# Simple PyPI Publishing
|
||||
|
||||
A GitHub workflow to manually publish LiteLLM packages to PyPI with a specified version.
|
||||
|
||||
## How to Use
|
||||
|
||||
1. Go to the **Actions** tab in the GitHub repository
|
||||
2. Select **Simple PyPI Publish** from the workflow list
|
||||
3. Click **Run workflow**
|
||||
4. Enter the version to publish (e.g., `1.74.10`)
|
||||
|
||||
## What the Workflow Does
|
||||
|
||||
1. **Updates** the version in `pyproject.toml`
|
||||
2. **Copies** the model prices backup file
|
||||
3. **Builds** the Python package
|
||||
4. **Publishes** to PyPI
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Make sure the following secret is configured in the repository:
|
||||
- `PYPI_PUBLISH_PASSWORD`: PyPI API token for authentication
|
||||
|
||||
## Example Usage
|
||||
|
||||
- Version: `1.74.11` → Publishes as v1.74.11
|
||||
- Version: `1.74.10-hotfix1` → Publishes as v1.74.10-hotfix1
|
||||
|
||||
## Features
|
||||
|
||||
- ✅ Manual trigger with version input
|
||||
- ✅ Automatic version updates in `pyproject.toml`
|
||||
- ✅ Repository safety check (only runs on official repo)
|
||||
- ✅ Clean package building and publishing
|
||||
- ✅ Success confirmation with PyPI package link
|
@@ -1,28 +0,0 @@
|
||||
name: Updates model_prices_and_context_window.json and Create Pull Request
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0" # Run every Sundays at midnight
|
||||
#- cron: "0 0 * * *" # Run daily at midnight
|
||||
|
||||
jobs:
|
||||
auto_update_price_and_context_window:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install aiohttp
|
||||
- name: Update JSON Data
|
||||
run: |
|
||||
python ".github/workflows/auto_update_price_and_context_window_file.py"
|
||||
- name: Create Pull Request
|
||||
run: |
|
||||
git add model_prices_and_context_window.json
|
||||
git commit -m "Update model_prices_and_context_window.json file: $(date +'%Y-%m-%d')"
|
||||
gh pr create --title "Update model_prices_and_context_window.json file" \
|
||||
--body "Automated update for model_prices_and_context_window.json" \
|
||||
--head auto-update-price-and-context-window-$(date +'%Y-%m-%d') \
|
||||
--base main
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GH_TOKEN }}
|
@@ -1,121 +0,0 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
|
||||
# Asynchronously fetch data from a given URL
|
||||
async def fetch_data(url):
|
||||
try:
|
||||
# Create an asynchronous session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Send a GET request to the URL
|
||||
async with session.get(url) as resp:
|
||||
# Raise an error if the response status is not OK
|
||||
resp.raise_for_status()
|
||||
# Parse the response JSON
|
||||
resp_json = await resp.json()
|
||||
print("Fetch the data from URL.")
|
||||
# Return the 'data' field from the JSON response
|
||||
return resp_json['data']
|
||||
except Exception as e:
|
||||
# Print an error message if fetching data fails
|
||||
print("Error fetching data from URL:", e)
|
||||
return None
|
||||
|
||||
# Synchronize local data with remote data
|
||||
def sync_local_data_with_remote(local_data, remote_data):
|
||||
# Update existing keys in local_data with values from remote_data
|
||||
for key in (set(local_data) & set(remote_data)):
|
||||
local_data[key].update(remote_data[key])
|
||||
|
||||
# Add new keys from remote_data to local_data
|
||||
for key in (set(remote_data) - set(local_data)):
|
||||
local_data[key] = remote_data[key]
|
||||
|
||||
# Write data to the json file
|
||||
def write_to_file(file_path, data):
|
||||
try:
|
||||
# Open the file in write mode
|
||||
with open(file_path, "w") as file:
|
||||
# Dump the data as JSON into the file
|
||||
json.dump(data, file, indent=4)
|
||||
print("Values updated successfully.")
|
||||
except Exception as e:
|
||||
# Print an error message if writing to file fails
|
||||
print("Error updating JSON file:", e)
|
||||
|
||||
# Update the existing models and add the missing models
|
||||
def transform_remote_data(data):
|
||||
transformed = {}
|
||||
for row in data:
|
||||
# Add the fields 'max_tokens' and 'input_cost_per_token'
|
||||
obj = {
|
||||
"max_tokens": row["context_length"],
|
||||
"input_cost_per_token": float(row["pricing"]["prompt"]),
|
||||
}
|
||||
|
||||
# Add 'max_output_tokens' as a field if it is not None
|
||||
if "top_provider" in row and "max_completion_tokens" in row["top_provider"] and row["top_provider"]["max_completion_tokens"] is not None:
|
||||
obj['max_output_tokens'] = int(row["top_provider"]["max_completion_tokens"])
|
||||
|
||||
# Add the field 'output_cost_per_token'
|
||||
obj.update({
|
||||
"output_cost_per_token": float(row["pricing"]["completion"]),
|
||||
})
|
||||
|
||||
# Add field 'input_cost_per_image' if it exists and is non-zero
|
||||
if "pricing" in row and "image" in row["pricing"] and float(row["pricing"]["image"]) != 0.0:
|
||||
obj['input_cost_per_image'] = float(row["pricing"]["image"])
|
||||
|
||||
# Add the fields 'litellm_provider' and 'mode'
|
||||
obj.update({
|
||||
"litellm_provider": "openrouter",
|
||||
"mode": "chat"
|
||||
})
|
||||
|
||||
# Add the 'supports_vision' field if the modality is 'multimodal'
|
||||
if row.get('architecture', {}).get('modality') == 'multimodal':
|
||||
obj['supports_vision'] = True
|
||||
|
||||
# Use a composite key to store the transformed object
|
||||
transformed[f'openrouter/{row["id"]}'] = obj
|
||||
|
||||
return transformed
|
||||
|
||||
|
||||
# Load local data from a specified file
|
||||
def load_local_data(file_path):
|
||||
try:
|
||||
# Open the file in read mode
|
||||
with open(file_path, "r") as file:
|
||||
# Load and return the JSON data
|
||||
return json.load(file)
|
||||
except FileNotFoundError:
|
||||
# Print an error message if the file is not found
|
||||
print("File not found:", file_path)
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
# Print an error message if JSON decoding fails
|
||||
print("Error decoding JSON:", e)
|
||||
return None
|
||||
|
||||
def main():
|
||||
local_file_path = "model_prices_and_context_window.json" # Path to the local data file
|
||||
url = "https://openrouter.ai/api/v1/models" # URL to fetch remote data
|
||||
|
||||
# Load local data from file
|
||||
local_data = load_local_data(local_file_path)
|
||||
# Fetch remote data asynchronously
|
||||
remote_data = asyncio.run(fetch_data(url))
|
||||
# Transform the fetched remote data
|
||||
remote_data = transform_remote_data(remote_data)
|
||||
|
||||
# If both local and remote data are available, synchronize and save
|
||||
if local_data and remote_data:
|
||||
sync_local_data_with_remote(local_data, remote_data)
|
||||
write_to_file(local_file_path, local_data)
|
||||
else:
|
||||
print("Failed to fetch model data from either local file or URL.")
|
||||
|
||||
# Entry point of the script
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,440 +0,0 @@
|
||||
# this workflow is triggered by an API call when there is a new PyPI release of LiteLLM
|
||||
name: Build, Publish LiteLLM Docker Image. New Release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "The tag version you want to build"
|
||||
release_type:
|
||||
description: "The release type you want to build. Can be 'latest', 'stable', 'dev', 'rc'"
|
||||
type: string
|
||||
default: "latest"
|
||||
commit_hash:
|
||||
description: "Commit hash"
|
||||
required: true
|
||||
|
||||
# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
CHART_NAME: litellm-helm
|
||||
|
||||
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
|
||||
jobs:
|
||||
# print commit hash, tag, and release type
|
||||
print:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: |
|
||||
echo "Commit hash: ${{ github.event.inputs.commit_hash }}"
|
||||
echo "Tag: ${{ github.event.inputs.tag }}"
|
||||
echo "Release type: ${{ github.event.inputs.release_type }}"
|
||||
docker-hub-deploy:
|
||||
if: github.repository == 'BerriAI/litellm'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }}
|
||||
-
|
||||
name: Build and push litellm-database image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: ./docker/Dockerfile.database
|
||||
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
|
||||
-
|
||||
name: Build and push litellm-spend-logs image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: ./litellm-js/spend-logs/Dockerfile
|
||||
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
|
||||
-
|
||||
name: Build and push litellm-non_root image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: ./docker/Dockerfile.non_root
|
||||
tags: litellm/litellm-non_root:${{ github.event.inputs.tag || 'latest' }}
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
# Configure multi platform Docker builds
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||
# This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
|
||||
# It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
|
||||
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||
${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm:main-stable', env.REGISTRY) || '' }},
|
||||
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm:{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-image-ee:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for EE Dockerfile
|
||||
id: meta-ee
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-ee
|
||||
# Configure multi platform Docker builds
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||
|
||||
- name: Build and push EE Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta-ee.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||
${{ steps.meta-ee.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-ee:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-ee:main-stable', env.REGISTRY) || '' }}
|
||||
labels: ${{ steps.meta-ee.outputs.labels }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-image-database:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for database Dockerfile
|
||||
id: meta-database
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-database
|
||||
# Configure multi platform Docker builds
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||
|
||||
- name: Build and push Database Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: .
|
||||
file: ./docker/Dockerfile.database
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||
${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-database:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-database:main-stable', env.REGISTRY) || '' }}
|
||||
labels: ${{ steps.meta-database.outputs.labels }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-image-non_root:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for non_root Dockerfile
|
||||
id: meta-non_root
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-non_root
|
||||
# Configure multi platform Docker builds
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||
|
||||
- name: Build and push non_root Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: .
|
||||
file: ./docker/Dockerfile.non_root
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||
${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-non_root:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-non_root:main-stable', env.REGISTRY) || '' }}
|
||||
labels: ${{ steps.meta-non_root.outputs.labels }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-image-spend-logs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_hash }}
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for spend-logs Dockerfile
|
||||
id: meta-spend-logs
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
|
||||
# Configure multi platform Docker builds
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||
|
||||
- name: Build and push Database Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: .
|
||||
file: ./litellm-js/spend-logs/Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||
${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-spend_logs:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-stable', env.REGISTRY) || '' }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-helm-chart:
|
||||
if: github.event.inputs.release_type != 'dev'
|
||||
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: lowercase github.repository_owner
|
||||
run: |
|
||||
echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
|
||||
|
||||
- name: Get LiteLLM Latest Tag
|
||||
id: current_app_tag
|
||||
shell: bash
|
||||
run: |
|
||||
LATEST_TAG=$(git describe --tags --exclude "*dev*" --abbrev=0)
|
||||
if [ -z "${LATEST_TAG}" ]; then
|
||||
echo "latest_tag=latest" | tee -a $GITHUB_OUTPUT
|
||||
else
|
||||
echo "latest_tag=${LATEST_TAG}" | tee -a $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Get last published chart version
|
||||
id: current_version
|
||||
shell: bash
|
||||
run: |
|
||||
CHART_LIST=$(helm show chart oci://${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/${{ env.CHART_NAME }} 2>/dev/null || true)
|
||||
if [ -z "${CHART_LIST}" ]; then
|
||||
echo "current-version=0.1.0" | tee -a $GITHUB_OUTPUT
|
||||
else
|
||||
printf '%s' "${CHART_LIST}" | grep '^version:' | awk 'BEGIN{FS=":"}{print "current-version="$2}' | tr -d " " | tee -a $GITHUB_OUTPUT
|
||||
fi
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
# Automatically update the helm chart version one "patch" level
|
||||
- name: Bump release version
|
||||
id: bump_version
|
||||
uses: christian-draeger/increment-semantic-version@1.1.0
|
||||
with:
|
||||
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
|
||||
version-fragment: 'bug'
|
||||
|
||||
- uses: ./.github/actions/helm-oci-chart-releaser
|
||||
with:
|
||||
name: ${{ env.CHART_NAME }}
|
||||
repository: ${{ env.REPO_OWNER }}
|
||||
tag: ${{ github.event.inputs.chartVersion || steps.bump_version.outputs.next-version || '0.1.0' }}
|
||||
app_version: ${{ steps.current_app_tag.outputs.latest_tag }}
|
||||
path: deploy/charts/${{ env.CHART_NAME }}
|
||||
registry: ${{ env.REGISTRY }}
|
||||
registry_username: ${{ github.actor }}
|
||||
registry_password: ${{ secrets.GITHUB_TOKEN }}
|
||||
update_dependencies: true
|
||||
|
||||
release:
|
||||
name: "New LiteLLM Release"
|
||||
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
|
||||
|
||||
runs-on: "ubuntu-latest"
|
||||
|
||||
steps:
|
||||
- name: Display version
|
||||
run: echo "Current version is ${{ github.event.inputs.tag }}"
|
||||
- name: "Set Release Tag"
|
||||
run: echo "RELEASE_TAG=${{ github.event.inputs.tag }}" >> $GITHUB_ENV
|
||||
- name: Display release tag
|
||||
run: echo "RELEASE_TAG is $RELEASE_TAG"
|
||||
- name: "Create release"
|
||||
uses: "actions/github-script@v6"
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
script: |
|
||||
const commitHash = "${{ github.event.inputs.commit_hash}}";
|
||||
console.log("Commit Hash:", commitHash); // Add this line for debugging
|
||||
try {
|
||||
const response = await github.rest.repos.createRelease({
|
||||
draft: false,
|
||||
generate_release_notes: true,
|
||||
target_commitish: commitHash,
|
||||
name: process.env.RELEASE_TAG,
|
||||
owner: context.repo.owner,
|
||||
prerelease: false,
|
||||
repo: context.repo.repo,
|
||||
tag_name: process.env.RELEASE_TAG,
|
||||
});
|
||||
|
||||
core.exportVariable('RELEASE_ID', response.data.id);
|
||||
core.exportVariable('RELEASE_UPLOAD_URL', response.data.upload_url);
|
||||
} catch (error) {
|
||||
core.setFailed(error.message);
|
||||
}
|
||||
- name: Fetch Release Notes
|
||||
id: release-notes
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
script: |
|
||||
try {
|
||||
const response = await github.rest.repos.getRelease({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
release_id: process.env.RELEASE_ID,
|
||||
});
|
||||
const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
|
||||
return formattedBody;
|
||||
} catch (error) {
|
||||
core.setFailed(error.message);
|
||||
}
|
||||
env:
|
||||
RELEASE_ID: ${{ env.RELEASE_ID }}
|
||||
- name: Github Releases To Discord
|
||||
env:
|
||||
WEBHOOK_URL: ${{ secrets.WEBHOOK_URL }}
|
||||
REALEASE_TAG: ${{ env.RELEASE_TAG }}
|
||||
RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
|
||||
run: |
|
||||
curl -H "Content-Type: application/json" -X POST -d '{
|
||||
"content": "New LiteLLM release '"${RELEASE_TAG}"'",
|
||||
"username": "Release Changelog",
|
||||
"avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
|
||||
"embeds": [
|
||||
{
|
||||
"title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
|
||||
"description": "'"${RELEASE_NOTES}"'",
|
||||
"color": 2105893
|
||||
}
|
||||
]
|
||||
}' $WEBHOOK_URL
|
||||
|
@@ -1,67 +0,0 @@
|
||||
# this workflow is triggered by an API call when there is a new PyPI release of LiteLLM
|
||||
name: Build, Publish LiteLLM Helm Chart. New Release
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
chartVersion:
|
||||
description: "Update the helm chart's version to this"
|
||||
|
||||
# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
REPO_OWNER: ${{github.repository_owner}}
|
||||
|
||||
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
|
||||
jobs:
|
||||
build-and-push-helm-chart:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: lowercase github.repository_owner
|
||||
run: |
|
||||
echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
|
||||
|
||||
- name: Get LiteLLM Latest Tag
|
||||
id: current_app_tag
|
||||
uses: WyriHaximus/github-action-get-previous-tag@v1.3.0
|
||||
|
||||
- name: Get last published chart version
|
||||
id: current_version
|
||||
shell: bash
|
||||
run: helm show chart oci://${{ env.REGISTRY }}/${{ env.REPO_OWNER }}/litellm-helm | grep '^version:' | awk 'BEGIN{FS=":"}{print "current-version="$2}' | tr -d " " | tee -a $GITHUB_OUTPUT
|
||||
env:
|
||||
HELM_EXPERIMENTAL_OCI: '1'
|
||||
|
||||
# Automatically update the helm chart version one "patch" level
|
||||
- name: Bump release version
|
||||
id: bump_version
|
||||
uses: christian-draeger/increment-semantic-version@1.1.0
|
||||
with:
|
||||
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
|
||||
version-fragment: 'bug'
|
||||
|
||||
- name: Lint helm chart
|
||||
run: helm lint deploy/charts/litellm-helm
|
||||
|
||||
- uses: ./.github/actions/helm-oci-chart-releaser
|
||||
with:
|
||||
name: litellm-helm
|
||||
repository: ${{ env.REPO_OWNER }}
|
||||
tag: ${{ github.event.inputs.chartVersion || steps.bump_version.outputs.next-version || '0.1.0' }}
|
||||
app_version: ${{ steps.current_app_tag.outputs.tag || 'latest' }}
|
||||
path: deploy/charts/litellm-helm
|
||||
registry: ${{ env.REGISTRY }}
|
||||
registry_username: ${{ github.actor }}
|
||||
registry_password: ${{ secrets.GITHUB_TOKEN }}
|
||||
update_dependencies: true
|
||||
|
@@ -1,27 +0,0 @@
|
||||
name: Helm unit test
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
unit-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Helm 3.11.1
|
||||
uses: azure/setup-helm@v1
|
||||
with:
|
||||
version: '3.11.1'
|
||||
|
||||
- name: Install Helm Unit Test Plugin
|
||||
run: |
|
||||
helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
|
||||
|
||||
- name: Run unit tests
|
||||
run:
|
||||
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
|
@@ -1,138 +0,0 @@
|
||||
import csv
|
||||
import os
|
||||
from github import Github
|
||||
|
||||
|
||||
def interpret_results(csv_file):
|
||||
with open(csv_file, newline="") as csvfile:
|
||||
csvreader = csv.DictReader(csvfile)
|
||||
rows = list(csvreader)
|
||||
"""
|
||||
in this csv reader
|
||||
- Create 1 new column "Status"
|
||||
- if a row has a median response time < 300 and an average response time < 300, Status = "Passed ✅"
|
||||
- if a row has a median response time >= 300 or an average response time >= 300, Status = "Failed ❌"
|
||||
- Order the table in this order Name, Status, Median Response Time, Average Response Time, Requests/s,Failures/s, Min Response Time, Max Response Time, all other columns
|
||||
"""
|
||||
|
||||
# Add a new column "Status"
|
||||
for row in rows:
|
||||
median_response_time = float(
|
||||
row["Median Response Time"].strip().rstrip("ms")
|
||||
)
|
||||
average_response_time = float(
|
||||
row["Average Response Time"].strip().rstrip("s")
|
||||
)
|
||||
|
||||
request_count = int(row["Request Count"])
|
||||
failure_count = int(row["Failure Count"])
|
||||
|
||||
failure_percent = round((failure_count / request_count) * 100, 2)
|
||||
|
||||
# Determine status based on conditions
|
||||
if (
|
||||
median_response_time < 300
|
||||
and average_response_time < 300
|
||||
and failure_percent < 5
|
||||
):
|
||||
row["Status"] = "Passed ✅"
|
||||
else:
|
||||
row["Status"] = "Failed ❌"
|
||||
|
||||
# Construct Markdown table header
|
||||
markdown_table = "| Name | Status | Median Response Time (ms) | Average Response Time (ms) | Requests/s | Failures/s | Request Count | Failure Count | Min Response Time (ms) | Max Response Time (ms) |"
|
||||
markdown_table += (
|
||||
"\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
|
||||
)
|
||||
|
||||
# Construct Markdown table rows
|
||||
for row in rows:
|
||||
markdown_table += f"\n| {row['Name']} | {row['Status']} | {row['Median Response Time']} | {row['Average Response Time']} | {row['Requests/s']} | {row['Failures/s']} | {row['Request Count']} | {row['Failure Count']} | {row['Min Response Time']} | {row['Max Response Time']} |"
|
||||
print("markdown table: ", markdown_table)
|
||||
return markdown_table
|
||||
|
||||
|
||||
def _get_docker_run_command_stable_release(release_version):
|
||||
return f"""
|
||||
\n\n
|
||||
## Docker Run LiteLLM Proxy
|
||||
|
||||
```
|
||||
docker run \\
|
||||
-e STORE_MODEL_IN_DB=True \\
|
||||
-p 4000:4000 \\
|
||||
ghcr.io/berriai/litellm:litellm_stable_release_branch-{release_version}
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
def _get_docker_run_command(release_version):
|
||||
return f"""
|
||||
\n\n
|
||||
## Docker Run LiteLLM Proxy
|
||||
|
||||
```
|
||||
docker run \\
|
||||
-e STORE_MODEL_IN_DB=True \\
|
||||
-p 4000:4000 \\
|
||||
ghcr.io/berriai/litellm:main-{release_version}
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
def get_docker_run_command(release_version):
|
||||
if "stable" in release_version:
|
||||
return _get_docker_run_command_stable_release(release_version)
|
||||
else:
|
||||
return _get_docker_run_command(release_version)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
csv_file = "load_test_stats.csv" # Change this to the path of your CSV file
|
||||
markdown_table = interpret_results(csv_file)
|
||||
|
||||
# Update release body with interpreted results
|
||||
github_token = os.getenv("GITHUB_TOKEN")
|
||||
g = Github(github_token)
|
||||
repo = g.get_repo(
|
||||
"BerriAI/litellm"
|
||||
) # Replace with your repository's username and name
|
||||
latest_release = repo.get_latest_release()
|
||||
print("got latest release: ", latest_release)
|
||||
print(latest_release.title)
|
||||
print(latest_release.tag_name)
|
||||
|
||||
release_version = latest_release.title
|
||||
|
||||
print("latest release body: ", latest_release.body)
|
||||
print("markdown table: ", markdown_table)
|
||||
|
||||
# check if "Load Test LiteLLM Proxy Results" exists
|
||||
existing_release_body = latest_release.body
|
||||
if "Load Test LiteLLM Proxy Results" in latest_release.body:
|
||||
# find the "Load Test LiteLLM Proxy Results" section and delete it
|
||||
start_index = latest_release.body.find("Load Test LiteLLM Proxy Results")
|
||||
existing_release_body = latest_release.body[:start_index]
|
||||
|
||||
docker_run_command = get_docker_run_command(release_version)
|
||||
print("docker run command: ", docker_run_command)
|
||||
|
||||
new_release_body = (
|
||||
existing_release_body
|
||||
+ docker_run_command
|
||||
+ "\n\n"
|
||||
+ "### Don't want to maintain your internal proxy? get in touch 🎉"
|
||||
+ "\nHosted Proxy Alpha: https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat"
|
||||
+ "\n\n"
|
||||
+ "## Load Test LiteLLM Proxy Results"
|
||||
+ "\n\n"
|
||||
+ markdown_table
|
||||
)
|
||||
print("new release body: ", new_release_body)
|
||||
try:
|
||||
latest_release.update_release(
|
||||
name=latest_release.tag_name,
|
||||
message=new_release_body,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
@@ -1,64 +0,0 @@
|
||||
name: Issue Keyword Labeler
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- opened
|
||||
|
||||
jobs:
|
||||
scan-and-label:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Scan for provider keywords
|
||||
id: scan
|
||||
env:
|
||||
PROVIDER_ISSUE_WEBHOOK_URL: ${{ secrets.PROVIDER_ISSUE_WEBHOOK_URL }}
|
||||
KEYWORDS: azure,openai,bedrock,vertexai,vertex ai,anthropic
|
||||
run: python3 .github/scripts/scan_keywords.py
|
||||
|
||||
- name: Ensure label exists
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const labelName = 'llm translation';
|
||||
try {
|
||||
await github.rest.issues.getLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
name: labelName
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.status === 404) {
|
||||
await github.rest.issues.createLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
name: labelName,
|
||||
color: 'c1ff72',
|
||||
description: 'Issues related to LLM provider translation/mapping'
|
||||
});
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
- name: Add label to the issue
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
labels: ['llm translation']
|
||||
});
|
||||
|
@@ -1,17 +0,0 @@
|
||||
name: Label ML Ops Team Issues
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- opened
|
||||
|
||||
jobs:
|
||||
add-mlops-label:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if ML Ops Team is selected
|
||||
uses: actions-ecosystem/action-add-labels@v1
|
||||
if: contains(github.event.issue.body, '### Are you a ML Ops Team?') && contains(github.event.issue.body, 'Yes')
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
labels: "mlops user request"
|
@@ -1,89 +0,0 @@
|
||||
name: LLM Translation Tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_candidate_tag:
|
||||
description: 'Release candidate tag/version'
|
||||
required: true
|
||||
type: string
|
||||
push:
|
||||
tags:
|
||||
- 'v*-rc*' # Triggers on release candidate tags like v1.0.0-rc1
|
||||
|
||||
jobs:
|
||||
run-llm-translation-tests:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 90
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.release_candidate_tag || github.ref }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
version: latest
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
|
||||
- name: Cache Poetry dependencies
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
.venv
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --with dev
|
||||
poetry run pip install pytest-xdist pytest-timeout
|
||||
|
||||
- name: Create test results directory
|
||||
run: mkdir -p test-results
|
||||
|
||||
- name: Run LLM Translation Tests
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||
AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
|
||||
AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
|
||||
# Add other API keys as needed
|
||||
run: |
|
||||
python .github/workflows/run_llm_translation_tests.py \
|
||||
--tag "${{ github.event.inputs.release_candidate_tag || github.ref_name }}" \
|
||||
--commit "${{ github.sha }}" \
|
||||
|| true # Continue even if tests fail
|
||||
|
||||
- name: Display test summary
|
||||
if: always()
|
||||
run: |
|
||||
if [ -f "test-results/llm_translation_report.md" ]; then
|
||||
echo "Test report generated successfully!"
|
||||
echo "Artifact will contain:"
|
||||
echo "- test-results/junit.xml (JUnit XML results)"
|
||||
echo "- test-results/llm_translation_report.md (Beautiful markdown report)"
|
||||
else
|
||||
echo "Warning: Test report was not generated"
|
||||
fi
|
||||
|
||||
- name: Upload test artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
name: LLM-Translation-Artifact-${{ github.event.inputs.release_candidate_tag || github.ref_name }}
|
||||
path: test-results/
|
||||
retention-days: 30
|
@@ -1,59 +0,0 @@
|
||||
name: Test Locust Load Test
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Build, Publish LiteLLM Docker Image. New Release"]
|
||||
types:
|
||||
- completed
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install PyGithub
|
||||
- name: re-deploy proxy
|
||||
run: |
|
||||
echo "Current working directory: $PWD"
|
||||
ls
|
||||
python ".github/workflows/redeploy_proxy.py"
|
||||
env:
|
||||
LOAD_TEST_REDEPLOY_URL1: ${{ secrets.LOAD_TEST_REDEPLOY_URL1 }}
|
||||
LOAD_TEST_REDEPLOY_URL2: ${{ secrets.LOAD_TEST_REDEPLOY_URL2 }}
|
||||
working-directory: ${{ github.workspace }}
|
||||
- name: Run Load Test
|
||||
id: locust_run
|
||||
uses: BerriAI/locust-github-action@master
|
||||
with:
|
||||
LOCUSTFILE: ".github/workflows/locustfile.py"
|
||||
URL: "https://post-release-load-test-proxy.onrender.com/"
|
||||
USERS: "20"
|
||||
RATE: "20"
|
||||
RUNTIME: "300s"
|
||||
- name: Process Load Test Stats
|
||||
run: |
|
||||
echo "Current working directory: $PWD"
|
||||
ls
|
||||
python ".github/workflows/interpret_load_test.py"
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
working-directory: ${{ github.workspace }}
|
||||
- name: Upload CSV as Asset to Latest Release
|
||||
uses: xresloader/upload-to-github-release@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
file: "load_test_stats.csv;load_test.html"
|
||||
update_latest_release: true
|
||||
tag_name: "load-test"
|
||||
overwrite: true
|
@@ -1,28 +0,0 @@
|
||||
from locust import HttpUser, task, between
|
||||
|
||||
|
||||
class MyUser(HttpUser):
|
||||
wait_time = between(1, 5)
|
||||
|
||||
@task
|
||||
def chat_completion(self):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg",
|
||||
# Include any additional headers you may need for authentication, etc.
|
||||
}
|
||||
|
||||
# Customize the payload with "model" and "messages" keys
|
||||
payload = {
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a chat bot."},
|
||||
{"role": "user", "content": "Hello, how are you?"},
|
||||
],
|
||||
# Add more data as necessary
|
||||
}
|
||||
|
||||
# Make a POST request to the "chat/completions" endpoint
|
||||
response = self.client.post("chat/completions", json=payload, headers=headers)
|
||||
|
||||
# Print or log the response if needed
|
34
Development/litellm/.github/workflows/main.yml
vendored
34
Development/litellm/.github/workflows/main.yml
vendored
@@ -1,34 +0,0 @@
|
||||
name: Publish Dev Release to PyPI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish-dev-release:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8 # Adjust the Python version as needed
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install toml twine
|
||||
|
||||
- name: Read version from pyproject.toml
|
||||
id: read-version
|
||||
run: |
|
||||
version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
|
||||
printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
|
||||
|
||||
- name: Check if version exists on PyPI
|
||||
id: check-version
|
||||
run: |
|
||||
set -e
|
||||
if twine check --repository-url https://pypi.org/simple/ "litellm==$LITELLM_VERSION" >/dev/null 2>&1; then
|
||||
echo "Version $LITELLM_VERSION already exists on PyPI. Skipping publish."
|
||||
|
@@ -1,206 +0,0 @@
|
||||
name: Publish Prisma Migrations
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'schema.prisma' # Check root schema.prisma
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
publish-migrations:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:14
|
||||
env:
|
||||
POSTGRES_DB: temp_db
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
ports:
|
||||
- 5432:5432
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
# Add shadow database service
|
||||
postgres_shadow:
|
||||
image: postgres:14
|
||||
env:
|
||||
POSTGRES_DB: shadow_db
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
ports:
|
||||
- 5433:5432
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install prisma
|
||||
pip install python-dotenv
|
||||
|
||||
- name: Generate Initial Migration if None Exists
|
||||
env:
|
||||
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||
run: |
|
||||
mkdir -p deploy/migrations
|
||||
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
|
||||
|
||||
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
|
||||
echo "No existing migrations found, creating baseline..."
|
||||
VERSION=$(date +%Y%m%d%H%M%S)
|
||||
mkdir -p deploy/migrations/${VERSION}_initial
|
||||
|
||||
echo "Generating initial migration..."
|
||||
# Save raw output for debugging
|
||||
prisma migrate diff \
|
||||
--from-empty \
|
||||
--to-schema-datamodel schema.prisma \
|
||||
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||
|
||||
echo "Raw migration file content:"
|
||||
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||
|
||||
echo "Cleaning migration file..."
|
||||
# Clean the file
|
||||
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
|
||||
|
||||
# Verify the migration file
|
||||
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
|
||||
echo "ERROR: Migration file is empty after cleaning"
|
||||
echo "Original content was:"
|
||||
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Final migration file content:"
|
||||
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||
|
||||
# Verify it starts with SQL
|
||||
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
|
||||
echo "ERROR: Migration file does not start with SQL command or comment"
|
||||
echo "First line is:"
|
||||
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
|
||||
echo "Full content is:"
|
||||
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
|
||||
fi
|
||||
|
||||
- name: Compare and Generate Migration
|
||||
if: success()
|
||||
env:
|
||||
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||
run: |
|
||||
# Create temporary migration workspace
|
||||
mkdir -p temp_migrations
|
||||
|
||||
# Copy existing migrations (will not fail if directory is empty)
|
||||
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
|
||||
|
||||
VERSION=$(date +%Y%m%d%H%M%S)
|
||||
|
||||
# Generate diff against existing migrations or empty state
|
||||
prisma migrate diff \
|
||||
--from-migrations temp_migrations \
|
||||
--to-schema-datamodel schema.prisma \
|
||||
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||
--script > temp_migrations/migration_${VERSION}.sql
|
||||
|
||||
# Check if there are actual changes
|
||||
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
|
||||
echo "Changes detected, creating new migration"
|
||||
mkdir -p deploy/migrations/${VERSION}_schema_update
|
||||
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
|
||||
else
|
||||
echo "No schema changes detected"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
- name: Verify Migration
|
||||
if: success()
|
||||
env:
|
||||
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||
run: |
|
||||
# Create test database
|
||||
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
|
||||
|
||||
# Apply all migrations in order to verify
|
||||
for migration in deploy/migrations/*/migration.sql; do
|
||||
echo "Applying migration: $migration"
|
||||
psql "${SHADOW_DATABASE_URL}" -f $migration
|
||||
done
|
||||
|
||||
# Add this step before create-pull-request to debug permissions
|
||||
- name: Check Token Permissions
|
||||
run: |
|
||||
echo "Checking token permissions..."
|
||||
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
https://api.github.com/repos/BerriAI/litellm/collaborators
|
||||
|
||||
echo "\nChecking if token can create PRs..."
|
||||
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
https://api.github.com/repos/BerriAI/litellm
|
||||
|
||||
# Add this debug step before git push
|
||||
- name: Debug Changed Files
|
||||
run: |
|
||||
echo "Files staged for commit:"
|
||||
git diff --name-status --staged
|
||||
|
||||
echo "\nAll changed files:"
|
||||
git status
|
||||
|
||||
- name: Create Pull Request
|
||||
if: success()
|
||||
uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: "chore: update prisma migrations"
|
||||
title: "Update Prisma Migrations"
|
||||
body: |
|
||||
Auto-generated migration based on schema.prisma changes.
|
||||
|
||||
Generated files:
|
||||
- deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||
- deploy/migrations/${VERSION}_schema_update/README.md
|
||||
branch: feat/prisma-migration-${{ env.VERSION }}
|
||||
base: main
|
||||
delete-branch: true
|
||||
|
||||
- name: Generate and Save Migrations
|
||||
run: |
|
||||
# Only add migration files
|
||||
git add deploy/migrations/
|
||||
git status # Debug what's being committed
|
||||
git commit -m "chore: update prisma migrations"
|
@@ -1,31 +0,0 @@
|
||||
name: Read Version from pyproject.toml
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main # Change this to the default branch of your repository
|
||||
|
||||
jobs:
|
||||
read-version:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8 # Adjust the Python version as needed
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install toml
|
||||
|
||||
- name: Read version from pyproject.toml
|
||||
id: read-version
|
||||
run: |
|
||||
version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
|
||||
printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
|
||||
|
||||
- name: Display version
|
||||
run: echo "Current version is $LITELLM_VERSION"
|
@@ -1,20 +0,0 @@
|
||||
"""
|
||||
|
||||
redeploy_proxy.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
|
||||
# send a get request to this endpoint
|
||||
deploy_hook1 = os.getenv("LOAD_TEST_REDEPLOY_URL1")
|
||||
response = requests.get(deploy_hook1, timeout=20)
|
||||
|
||||
|
||||
deploy_hook2 = os.getenv("LOAD_TEST_REDEPLOY_URL2")
|
||||
response = requests.get(deploy_hook2, timeout=20)
|
||||
|
||||
print("SENT GET REQUESTS to re-deploy proxy")
|
||||
print("sleeeping.... for 60s")
|
||||
time.sleep(60)
|
@@ -1,39 +0,0 @@
|
||||
name: Reset litellm_stable branch
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published, created]
|
||||
jobs:
|
||||
update-stable-branch:
|
||||
if: ${{ startsWith(github.event.release.tag_name, 'v') && !endsWith(github.event.release.tag_name, '-stable') }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Reset litellm_stable_release_branch branch to the release commit
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Configure Git user
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
# Fetch all branches and tags
|
||||
git fetch --all
|
||||
|
||||
# Check if the litellm_stable_release_branch branch exists
|
||||
if git show-ref --verify --quiet refs/remotes/origin/litellm_stable_release_branch; then
|
||||
echo "litellm_stable_release_branch branch exists."
|
||||
git checkout litellm_stable_release_branch
|
||||
else
|
||||
echo "litellm_stable_release_branch branch does not exist. Creating it."
|
||||
git checkout -b litellm_stable_release_branch
|
||||
fi
|
||||
|
||||
# Reset litellm_stable_release_branch branch to the release commit
|
||||
git reset --hard $GITHUB_SHA
|
||||
|
||||
# Push the updated litellm_stable_release_branch branch
|
||||
git push origin litellm_stable_release_branch --force
|
@@ -1,27 +0,0 @@
|
||||
Date,"Ben
|
||||
Ashley",Tom Brooks,Jimmy Cooney,"Sue
|
||||
Daniels",Berlinda Fong,Terry Jones,Angelina Little,Linda Smith
|
||||
10/1,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,TRUE
|
||||
10/2,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/3,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/4,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/5,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/6,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/7,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/8,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/9,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/10,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/11,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/12,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/13,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/14,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/15,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/16,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/17,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/18,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/19,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/20,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/21,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/22,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
10/23,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
|
||||
Total,0,1,1,1,1,1,0,1
|
|
@@ -1,439 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Run LLM Translation Tests and Generate Beautiful Markdown Report
|
||||
|
||||
This script runs the LLM translation tests and generates a comprehensive
|
||||
markdown report with provider-specific breakdowns and test statistics.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import xml.etree.ElementTree as ET
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import json
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
# ANSI color codes for terminal output
|
||||
class Colors:
|
||||
GREEN = '\033[92m'
|
||||
RED = '\033[91m'
|
||||
YELLOW = '\033[93m'
|
||||
BLUE = '\033[94m'
|
||||
PURPLE = '\033[95m'
|
||||
CYAN = '\033[96m'
|
||||
RESET = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
|
||||
def print_colored(message: str, color: str = Colors.RESET):
|
||||
"""Print colored message to terminal"""
|
||||
print(f"{color}{message}{Colors.RESET}")
|
||||
|
||||
def get_provider_from_test_file(test_file: str) -> str:
|
||||
"""Map test file names to provider names"""
|
||||
provider_mapping = {
|
||||
'test_anthropic': 'Anthropic',
|
||||
'test_azure': 'Azure',
|
||||
'test_bedrock': 'AWS Bedrock',
|
||||
'test_openai': 'OpenAI',
|
||||
'test_vertex': 'Google Vertex AI',
|
||||
'test_gemini': 'Google Vertex AI',
|
||||
'test_cohere': 'Cohere',
|
||||
'test_databricks': 'Databricks',
|
||||
'test_groq': 'Groq',
|
||||
'test_together': 'Together AI',
|
||||
'test_mistral': 'Mistral',
|
||||
'test_deepseek': 'DeepSeek',
|
||||
'test_replicate': 'Replicate',
|
||||
'test_huggingface': 'HuggingFace',
|
||||
'test_fireworks': 'Fireworks AI',
|
||||
'test_perplexity': 'Perplexity',
|
||||
'test_cloudflare': 'Cloudflare',
|
||||
'test_voyage': 'Voyage AI',
|
||||
'test_xai': 'xAI',
|
||||
'test_nvidia': 'NVIDIA',
|
||||
'test_watsonx': 'IBM watsonx',
|
||||
'test_azure_ai': 'Azure AI',
|
||||
'test_snowflake': 'Snowflake',
|
||||
'test_infinity': 'Infinity',
|
||||
'test_jina': 'Jina AI',
|
||||
'test_deepgram': 'Deepgram',
|
||||
'test_clarifai': 'Clarifai',
|
||||
'test_triton': 'Triton',
|
||||
}
|
||||
|
||||
for key, provider in provider_mapping.items():
|
||||
if key in test_file:
|
||||
return provider
|
||||
|
||||
# For cross-provider test files
|
||||
if any(name in test_file for name in ['test_optional_params', 'test_prompt_factory',
|
||||
'test_router', 'test_text_completion']):
|
||||
return f'Cross-Provider Tests ({test_file})'
|
||||
|
||||
return 'Other Tests'
|
||||
|
||||
def format_duration(seconds: float) -> str:
|
||||
"""Format duration in human-readable format"""
|
||||
if seconds < 60:
|
||||
return f"{seconds:.2f}s"
|
||||
elif seconds < 3600:
|
||||
minutes = int(seconds // 60)
|
||||
secs = seconds % 60
|
||||
return f"{minutes}m {secs:.0f}s"
|
||||
else:
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
return f"{hours}h {minutes}m"
|
||||
|
||||
|
||||
def generate_markdown_report(junit_xml_path: str, output_path: str, tag: str = None, commit: str = None):
|
||||
"""Generate a beautiful markdown report from JUnit XML"""
|
||||
try:
|
||||
tree = ET.parse(junit_xml_path)
|
||||
root = tree.getroot()
|
||||
|
||||
# Handle both testsuite and testsuites root
|
||||
if root.tag == 'testsuites':
|
||||
suites = root.findall('testsuite')
|
||||
else:
|
||||
suites = [root]
|
||||
|
||||
# Overall statistics
|
||||
total_tests = 0
|
||||
total_failures = 0
|
||||
total_errors = 0
|
||||
total_skipped = 0
|
||||
total_time = 0.0
|
||||
|
||||
# Provider breakdown
|
||||
provider_stats = defaultdict(lambda: {'passed': 0, 'failed': 0, 'skipped': 0, 'errors': 0, 'time': 0.0})
|
||||
provider_tests = defaultdict(list)
|
||||
|
||||
for suite in suites:
|
||||
total_tests += int(suite.get('tests', 0))
|
||||
total_failures += int(suite.get('failures', 0))
|
||||
total_errors += int(suite.get('errors', 0))
|
||||
total_skipped += int(suite.get('skipped', 0))
|
||||
total_time += float(suite.get('time', 0))
|
||||
|
||||
for testcase in suite.findall('testcase'):
|
||||
classname = testcase.get('classname', '')
|
||||
test_name = testcase.get('name', '')
|
||||
test_time = float(testcase.get('time', 0))
|
||||
|
||||
# Extract test file name from classname
|
||||
if '.' in classname:
|
||||
parts = classname.split('.')
|
||||
test_file = parts[-2] if len(parts) > 1 else 'unknown'
|
||||
else:
|
||||
test_file = 'unknown'
|
||||
|
||||
provider = get_provider_from_test_file(test_file)
|
||||
provider_stats[provider]['time'] += test_time
|
||||
|
||||
# Check test status
|
||||
if testcase.find('failure') is not None:
|
||||
provider_stats[provider]['failed'] += 1
|
||||
failure = testcase.find('failure')
|
||||
failure_msg = failure.get('message', '') if failure is not None else ''
|
||||
provider_tests[provider].append({
|
||||
'name': test_name,
|
||||
'status': 'FAILED',
|
||||
'time': test_time,
|
||||
'message': failure_msg
|
||||
})
|
||||
elif testcase.find('error') is not None:
|
||||
provider_stats[provider]['errors'] += 1
|
||||
error = testcase.find('error')
|
||||
error_msg = error.get('message', '') if error is not None else ''
|
||||
provider_tests[provider].append({
|
||||
'name': test_name,
|
||||
'status': 'ERROR',
|
||||
'time': test_time,
|
||||
'message': error_msg
|
||||
})
|
||||
elif testcase.find('skipped') is not None:
|
||||
provider_stats[provider]['skipped'] += 1
|
||||
skip = testcase.find('skipped')
|
||||
skip_msg = skip.get('message', '') if skip is not None else ''
|
||||
provider_tests[provider].append({
|
||||
'name': test_name,
|
||||
'status': 'SKIPPED',
|
||||
'time': test_time,
|
||||
'message': skip_msg
|
||||
})
|
||||
else:
|
||||
provider_stats[provider]['passed'] += 1
|
||||
provider_tests[provider].append({
|
||||
'name': test_name,
|
||||
'status': 'PASSED',
|
||||
'time': test_time,
|
||||
'message': ''
|
||||
})
|
||||
|
||||
passed = total_tests - total_failures - total_errors - total_skipped
|
||||
|
||||
# Generate the markdown report
|
||||
with open(output_path, 'w') as f:
|
||||
# Header
|
||||
f.write("# LLM Translation Test Results\n\n")
|
||||
|
||||
# Metadata table
|
||||
f.write("## Test Run Information\n\n")
|
||||
f.write("| Field | Value |\n")
|
||||
f.write("|-------|-------|\n")
|
||||
f.write(f"| **Tag** | `{tag or 'N/A'}` |\n")
|
||||
f.write(f"| **Date** | {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')} |\n")
|
||||
f.write(f"| **Commit** | `{commit or 'N/A'}` |\n")
|
||||
f.write(f"| **Duration** | {format_duration(total_time)} |\n")
|
||||
f.write("\n")
|
||||
|
||||
# Overall statistics with visual elements
|
||||
f.write("## Overall Statistics\n\n")
|
||||
|
||||
# Summary box
|
||||
f.write("```\n")
|
||||
f.write(f"Total Tests: {total_tests}\n")
|
||||
f.write(f"├── Passed: {passed:>4} ({(passed/total_tests)*100 if total_tests > 0 else 0:.1f}%)\n")
|
||||
f.write(f"├── Failed: {total_failures:>4} ({(total_failures/total_tests)*100 if total_tests > 0 else 0:.1f}%)\n")
|
||||
f.write(f"├── Errors: {total_errors:>4} ({(total_errors/total_tests)*100 if total_tests > 0 else 0:.1f}%)\n")
|
||||
f.write(f"└── Skipped: {total_skipped:>4} ({(total_skipped/total_tests)*100 if total_tests > 0 else 0:.1f}%)\n")
|
||||
f.write("```\n\n")
|
||||
|
||||
|
||||
# Provider summary table
|
||||
f.write("## Results by Provider\n\n")
|
||||
f.write("| Provider | Total | Pass | Fail | Error | Skip | Pass Rate | Duration |\n")
|
||||
f.write("|----------|-------|------|------|-------|------|-----------|----------|")
|
||||
|
||||
# Sort providers: specific providers first, then cross-provider tests
|
||||
sorted_providers = []
|
||||
cross_provider = []
|
||||
for p in sorted(provider_stats.keys()):
|
||||
if 'Cross-Provider' in p or p == 'Other Tests':
|
||||
cross_provider.append(p)
|
||||
else:
|
||||
sorted_providers.append(p)
|
||||
|
||||
all_providers = sorted_providers + cross_provider
|
||||
|
||||
for provider in all_providers:
|
||||
stats = provider_stats[provider]
|
||||
total = stats['passed'] + stats['failed'] + stats['errors'] + stats['skipped']
|
||||
pass_rate = (stats['passed'] / total * 100) if total > 0 else 0
|
||||
|
||||
f.write(f"\n| {provider} | {total} | {stats['passed']} | {stats['failed']} | ")
|
||||
f.write(f"{stats['errors']} | {stats['skipped']} | {pass_rate:.1f}% | ")
|
||||
f.write(f"{format_duration(stats['time'])} |")
|
||||
|
||||
# Detailed test results by provider
|
||||
f.write("\n\n## Detailed Test Results\n\n")
|
||||
|
||||
for provider in sorted_providers:
|
||||
if provider_tests[provider]:
|
||||
stats = provider_stats[provider]
|
||||
total = stats['passed'] + stats['failed'] + stats['errors'] + stats['skipped']
|
||||
|
||||
f.write(f"### {provider}\n\n")
|
||||
f.write(f"**Summary:** {stats['passed']}/{total} passed ")
|
||||
f.write(f"({(stats['passed']/total)*100 if total > 0 else 0:.1f}%) ")
|
||||
f.write(f"in {format_duration(stats['time'])}\n\n")
|
||||
|
||||
# Group tests by status
|
||||
tests_by_status = defaultdict(list)
|
||||
for test in provider_tests[provider]:
|
||||
tests_by_status[test['status']].append(test)
|
||||
|
||||
# Show failed tests first (if any)
|
||||
if tests_by_status['FAILED']:
|
||||
f.write("<details>\n<summary>Failed Tests</summary>\n\n")
|
||||
for test in tests_by_status['FAILED']:
|
||||
f.write(f"- `{test['name']}` ({test['time']:.2f}s)\n")
|
||||
if test['message']:
|
||||
# Truncate long error messages
|
||||
msg = test['message'][:200] + '...' if len(test['message']) > 200 else test['message']
|
||||
f.write(f" > {msg}\n")
|
||||
f.write("\n</details>\n\n")
|
||||
|
||||
# Show errors (if any)
|
||||
if tests_by_status['ERROR']:
|
||||
f.write("<details>\n<summary>Error Tests</summary>\n\n")
|
||||
for test in tests_by_status['ERROR']:
|
||||
f.write(f"- `{test['name']}` ({test['time']:.2f}s)\n")
|
||||
f.write("\n</details>\n\n")
|
||||
|
||||
# Show passed tests in collapsible section
|
||||
if tests_by_status['PASSED']:
|
||||
f.write("<details>\n<summary>Passed Tests</summary>\n\n")
|
||||
for test in tests_by_status['PASSED']:
|
||||
f.write(f"- `{test['name']}` ({test['time']:.2f}s)\n")
|
||||
f.write("\n</details>\n\n")
|
||||
|
||||
# Show skipped tests (if any)
|
||||
if tests_by_status['SKIPPED']:
|
||||
f.write("<details>\n<summary>Skipped Tests</summary>\n\n")
|
||||
for test in tests_by_status['SKIPPED']:
|
||||
f.write(f"- `{test['name']}`\n")
|
||||
f.write("\n</details>\n\n")
|
||||
|
||||
# Cross-provider tests in a separate section
|
||||
if cross_provider:
|
||||
f.write("### Cross-Provider Tests\n\n")
|
||||
for provider in cross_provider:
|
||||
if provider_tests[provider]:
|
||||
stats = provider_stats[provider]
|
||||
total = stats['passed'] + stats['failed'] + stats['errors'] + stats['skipped']
|
||||
|
||||
f.write(f"#### {provider}\n\n")
|
||||
f.write(f"**Summary:** {stats['passed']}/{total} passed ")
|
||||
f.write(f"({(stats['passed']/total)*100 if total > 0 else 0:.1f}%)\n\n")
|
||||
|
||||
# For cross-provider tests, just show counts
|
||||
f.write(f"- Passed: {stats['passed']}\n")
|
||||
if stats['failed'] > 0:
|
||||
f.write(f"- Failed: {stats['failed']}\n")
|
||||
if stats['errors'] > 0:
|
||||
f.write(f"- Errors: {stats['errors']}\n")
|
||||
if stats['skipped'] > 0:
|
||||
f.write(f"- Skipped: {stats['skipped']}\n")
|
||||
f.write("\n")
|
||||
|
||||
|
||||
print_colored(f"Report generated: {output_path}", Colors.GREEN)
|
||||
|
||||
except Exception as e:
|
||||
print_colored(f"Error generating report: {e}", Colors.RED)
|
||||
raise
|
||||
|
||||
def run_tests(test_path: str = "tests/llm_translation/",
|
||||
junit_xml: str = "test-results/junit.xml",
|
||||
report_path: str = "test-results/llm_translation_report.md",
|
||||
tag: str = None,
|
||||
commit: str = None) -> int:
|
||||
"""Run the LLM translation tests and generate report"""
|
||||
|
||||
# Create test results directory
|
||||
os.makedirs(os.path.dirname(junit_xml), exist_ok=True)
|
||||
|
||||
print_colored("Starting LLM Translation Tests", Colors.BOLD + Colors.BLUE)
|
||||
print_colored(f"Test directory: {test_path}", Colors.CYAN)
|
||||
print_colored(f"Output: {junit_xml}", Colors.CYAN)
|
||||
print()
|
||||
|
||||
# Run pytest
|
||||
cmd = [
|
||||
"poetry", "run", "pytest", test_path,
|
||||
f"--junitxml={junit_xml}",
|
||||
"-v",
|
||||
"--tb=short",
|
||||
"--maxfail=500",
|
||||
"-n", "auto"
|
||||
]
|
||||
|
||||
# Add timeout if pytest-timeout is installed
|
||||
try:
|
||||
subprocess.run(["poetry", "run", "python", "-c", "import pytest_timeout"],
|
||||
capture_output=True, check=True)
|
||||
cmd.extend(["--timeout=300"])
|
||||
except:
|
||||
print_colored("Warning: pytest-timeout not installed, skipping timeout option", Colors.YELLOW)
|
||||
|
||||
print_colored("Running pytest with command:", Colors.YELLOW)
|
||||
print(f" {' '.join(cmd)}")
|
||||
print()
|
||||
|
||||
# Run the tests
|
||||
result = subprocess.run(cmd, capture_output=False)
|
||||
|
||||
# Generate the report regardless of test outcome
|
||||
if os.path.exists(junit_xml):
|
||||
print()
|
||||
print_colored("Generating test report...", Colors.BLUE)
|
||||
generate_markdown_report(junit_xml, report_path, tag, commit)
|
||||
|
||||
# Print summary to console
|
||||
print()
|
||||
print_colored("Test Summary:", Colors.BOLD + Colors.PURPLE)
|
||||
|
||||
# Parse XML for quick summary
|
||||
tree = ET.parse(junit_xml)
|
||||
root = tree.getroot()
|
||||
|
||||
if root.tag == 'testsuites':
|
||||
suites = root.findall('testsuite')
|
||||
else:
|
||||
suites = [root]
|
||||
|
||||
total = sum(int(s.get('tests', 0)) for s in suites)
|
||||
failures = sum(int(s.get('failures', 0)) for s in suites)
|
||||
errors = sum(int(s.get('errors', 0)) for s in suites)
|
||||
skipped = sum(int(s.get('skipped', 0)) for s in suites)
|
||||
passed = total - failures - errors - skipped
|
||||
|
||||
print(f" Total: {total}")
|
||||
print_colored(f" Passed: {passed}", Colors.GREEN)
|
||||
if failures > 0:
|
||||
print_colored(f" Failed: {failures}", Colors.RED)
|
||||
if errors > 0:
|
||||
print_colored(f" Errors: {errors}", Colors.RED)
|
||||
if skipped > 0:
|
||||
print_colored(f" Skipped: {skipped}", Colors.YELLOW)
|
||||
|
||||
if total > 0:
|
||||
pass_rate = (passed / total) * 100
|
||||
color = Colors.GREEN if pass_rate >= 80 else Colors.YELLOW if pass_rate >= 60 else Colors.RED
|
||||
print_colored(f" Pass Rate: {pass_rate:.1f}%", color)
|
||||
else:
|
||||
print_colored("No test results found!", Colors.RED)
|
||||
|
||||
print()
|
||||
print_colored("Test run complete!", Colors.BOLD + Colors.GREEN)
|
||||
|
||||
return result.returncode
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Run LLM Translation Tests")
|
||||
parser.add_argument("--test-path", default="tests/llm_translation/",
|
||||
help="Path to test directory")
|
||||
parser.add_argument("--junit-xml", default="test-results/junit.xml",
|
||||
help="Path for JUnit XML output")
|
||||
parser.add_argument("--report", default="test-results/llm_translation_report.md",
|
||||
help="Path for markdown report")
|
||||
parser.add_argument("--tag", help="Git tag or version")
|
||||
parser.add_argument("--commit", help="Git commit SHA")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get git info if not provided
|
||||
if not args.commit:
|
||||
try:
|
||||
result = subprocess.run(["git", "rev-parse", "HEAD"],
|
||||
capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
args.commit = result.stdout.strip()
|
||||
except:
|
||||
pass
|
||||
|
||||
if not args.tag:
|
||||
try:
|
||||
result = subprocess.run(["git", "describe", "--tags", "--abbrev=0"],
|
||||
capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
args.tag = result.stdout.strip()
|
||||
except:
|
||||
pass
|
||||
|
||||
exit_code = run_tests(
|
||||
test_path=args.test_path,
|
||||
junit_xml=args.junit_xml,
|
||||
report_path=args.report,
|
||||
tag=args.tag,
|
||||
commit=args.commit
|
||||
)
|
||||
|
||||
sys.exit(exit_code)
|
@@ -1,67 +0,0 @@
|
||||
name: Simple PyPI Publish
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version to publish (e.g., 1.74.10)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.repository == 'BerriAI/litellm'
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.8'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install toml build wheel twine
|
||||
|
||||
- name: Update version in pyproject.toml
|
||||
run: |
|
||||
python -c "
|
||||
import toml
|
||||
|
||||
with open('pyproject.toml', 'r') as f:
|
||||
data = toml.load(f)
|
||||
|
||||
data['tool']['poetry']['version'] = '${{ github.event.inputs.version }}'
|
||||
|
||||
with open('pyproject.toml', 'w') as f:
|
||||
toml.dump(data, f)
|
||||
|
||||
print(f'Updated version to ${{ github.event.inputs.version }}')
|
||||
"
|
||||
|
||||
- name: Copy model prices file
|
||||
run: |
|
||||
cp model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json
|
||||
|
||||
- name: Build package
|
||||
run: |
|
||||
rm -rf build dist
|
||||
python -m build
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PUBLISH_PASSWORD }}
|
||||
run: |
|
||||
twine upload dist/*
|
||||
|
||||
- name: Output success
|
||||
run: |
|
||||
echo "✅ Successfully published litellm v${{ github.event.inputs.version }} to PyPI"
|
||||
echo "📦 Package: https://pypi.org/project/litellm/${{ github.event.inputs.version }}/"
|
20
Development/litellm/.github/workflows/stale.yml
vendored
20
Development/litellm/.github/workflows/stale.yml
vendored
@@ -1,20 +0,0 @@
|
||||
name: "Stale Issue Management"
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *' # Runs daily at midnight UTC
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/stale@v8
|
||||
with:
|
||||
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
stale-issue-message: "This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
|
||||
stale-pr-message: "This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
|
||||
days-before-stale: 90 # Revert to 60 days
|
||||
days-before-close: 7 # Revert to 7 days
|
||||
stale-issue-label: "stale"
|
||||
operations-per-run: 1000
|
@@ -1,57 +0,0 @@
|
||||
name: LiteLLM Linting
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install openai==1.99.5
|
||||
poetry install --with dev
|
||||
pip install openai==1.99.5
|
||||
|
||||
|
||||
|
||||
- name: Run Black formatting
|
||||
run: |
|
||||
cd litellm
|
||||
poetry run black .
|
||||
cd ..
|
||||
|
||||
- name: Run Ruff linting
|
||||
run: |
|
||||
cd litellm
|
||||
poetry run ruff check .
|
||||
cd ..
|
||||
|
||||
- name: Run MyPy type checking
|
||||
run: |
|
||||
cd litellm
|
||||
poetry run mypy . --ignore-missing-imports
|
||||
cd ..
|
||||
|
||||
- name: Check for circular imports
|
||||
run: |
|
||||
cd litellm
|
||||
poetry run python ../tests/documentation_tests/test_circular_imports.py
|
||||
cd ..
|
||||
|
||||
- name: Check import safety
|
||||
run: |
|
||||
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
@@ -1,42 +0,0 @@
|
||||
name: LiteLLM Mock Tests (folder - tests/test_litellm)
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Thank You Message
|
||||
run: |
|
||||
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
|
||||
poetry run pip install "pytest-retry==1.6.3"
|
||||
poetry run pip install pytest-xdist
|
||||
poetry run pip install "google-genai==1.22.0"
|
||||
poetry run pip install "fastapi-offline==1.7.3"
|
||||
- name: Setup litellm-enterprise as local package
|
||||
run: |
|
||||
cd enterprise
|
||||
python -m pip install -e .
|
||||
cd ..
|
||||
- name: Run tests
|
||||
run: |
|
||||
poetry run pytest tests/test_litellm -x -vv -n 4
|
@@ -1,54 +0,0 @@
|
||||
import os
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
# GitHub API endpoints
|
||||
GITHUB_API_URL = "https://api.github.com"
|
||||
REPO_OWNER = "BerriAI"
|
||||
REPO_NAME = "litellm"
|
||||
|
||||
# GitHub personal access token (required for uploading release assets)
|
||||
GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN")
|
||||
|
||||
# Headers for GitHub API requests
|
||||
headers = {
|
||||
"Accept": "application/vnd.github+json",
|
||||
"Authorization": f"Bearer {GITHUB_ACCESS_TOKEN}",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
|
||||
# Get the latest release
|
||||
releases_url = f"{GITHUB_API_URL}/repos/{REPO_OWNER}/{REPO_NAME}/releases/latest"
|
||||
response = requests.get(releases_url, headers=headers)
|
||||
latest_release = response.json()
|
||||
print("Latest release:", latest_release)
|
||||
|
||||
# Upload an asset to the latest release
|
||||
upload_url = latest_release["upload_url"].split("{?")[0]
|
||||
asset_name = "results_stats.csv"
|
||||
asset_path = os.path.join(os.getcwd(), asset_name)
|
||||
print("upload_url:", upload_url)
|
||||
|
||||
with open(asset_path, "rb") as asset_file:
|
||||
asset_data = asset_file.read()
|
||||
|
||||
upload_payload = {
|
||||
"name": asset_name,
|
||||
"label": "Load test results",
|
||||
"created_at": datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
|
||||
upload_headers = headers.copy()
|
||||
upload_headers["Content-Type"] = "application/octet-stream"
|
||||
|
||||
upload_response = requests.post(
|
||||
upload_url,
|
||||
headers=upload_headers,
|
||||
data=asset_data,
|
||||
params=upload_payload,
|
||||
)
|
||||
|
||||
if upload_response.status_code == 201:
|
||||
print(f"Asset '{asset_name}' uploaded successfully to the latest release.")
|
||||
else:
|
||||
print(f"Failed to upload asset. Response: {upload_response.text}")
|
@@ -1,40 +0,0 @@
|
||||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: pyright
|
||||
name: pyright
|
||||
entry: pyright
|
||||
language: system
|
||||
types: [python]
|
||||
files: ^(litellm/|litellm_proxy_extras/|enterprise/)
|
||||
- id: isort
|
||||
name: isort
|
||||
entry: isort
|
||||
language: system
|
||||
types: [python]
|
||||
files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
|
||||
exclude: ^litellm/__init__.py$
|
||||
# - id: black
|
||||
# name: black
|
||||
# entry: poetry run black
|
||||
# language: system
|
||||
# types: [python]
|
||||
# files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
|
||||
- repo: https://github.com/pycqa/flake8
|
||||
rev: 7.0.0 # The version of flake8 to use
|
||||
hooks:
|
||||
- id: flake8
|
||||
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/test_litellm/|^tests/test_litellm/|^tests/enterprise/
|
||||
additional_dependencies: [flake8-print]
|
||||
files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
|
||||
- repo: https://github.com/python-poetry/poetry
|
||||
rev: 1.8.0
|
||||
hooks:
|
||||
- id: poetry-check
|
||||
files: ^(pyproject.toml|litellm-proxy-extras/pyproject.toml)$
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: check-files-match
|
||||
name: Check if files match
|
||||
entry: python3 ci_cd/check_files_match.py
|
||||
language: system
|
@@ -1,144 +0,0 @@
|
||||
# INSTRUCTIONS FOR LITELLM
|
||||
|
||||
This document provides comprehensive instructions for AI agents working in the LiteLLM repository.
|
||||
|
||||
## OVERVIEW
|
||||
|
||||
LiteLLM is a unified interface for 100+ LLMs that:
|
||||
- Translates inputs to provider-specific completion, embedding, and image generation endpoints
|
||||
- Provides consistent OpenAI-format output across all providers
|
||||
- Includes retry/fallback logic across multiple deployments (Router)
|
||||
- Offers a proxy server (LLM Gateway) with budgets, rate limits, and authentication
|
||||
- Supports advanced features like function calling, streaming, caching, and observability
|
||||
|
||||
## REPOSITORY STRUCTURE
|
||||
|
||||
### Core Components
|
||||
- `litellm/` - Main library code
|
||||
- `llms/` - Provider-specific implementations (OpenAI, Anthropic, Azure, etc.)
|
||||
- `proxy/` - Proxy server implementation (LLM Gateway)
|
||||
- `router_utils/` - Load balancing and fallback logic
|
||||
- `types/` - Type definitions and schemas
|
||||
- `integrations/` - Third-party integrations (observability, caching, etc.)
|
||||
|
||||
### Key Directories
|
||||
- `tests/` - Comprehensive test suites
|
||||
- `docs/my-website/` - Documentation website
|
||||
- `ui/litellm-dashboard/` - Admin dashboard UI
|
||||
- `enterprise/` - Enterprise-specific features
|
||||
|
||||
## DEVELOPMENT GUIDELINES
|
||||
|
||||
### MAKING CODE CHANGES
|
||||
|
||||
1. **Provider Implementations**: When adding/modifying LLM providers:
|
||||
- Follow existing patterns in `litellm/llms/{provider}/`
|
||||
- Implement proper transformation classes that inherit from `BaseConfig`
|
||||
- Support both sync and async operations
|
||||
- Handle streaming responses appropriately
|
||||
- Include proper error handling with provider-specific exceptions
|
||||
|
||||
2. **Type Safety**:
|
||||
- Use proper type hints throughout
|
||||
- Update type definitions in `litellm/types/`
|
||||
- Ensure compatibility with both Pydantic v1 and v2
|
||||
|
||||
3. **Testing**:
|
||||
- Add tests in appropriate `tests/` subdirectories
|
||||
- Include both unit tests and integration tests
|
||||
- Test provider-specific functionality thoroughly
|
||||
- Consider adding load tests for performance-critical changes
|
||||
|
||||
### IMPORTANT PATTERNS
|
||||
|
||||
1. **Function/Tool Calling**:
|
||||
- LiteLLM standardizes tool calling across providers
|
||||
- OpenAI format is the standard, with transformations for other providers
|
||||
- See `litellm/llms/anthropic/chat/transformation.py` for complex tool handling
|
||||
|
||||
2. **Streaming**:
|
||||
- All providers should support streaming where possible
|
||||
- Use consistent chunk formatting across providers
|
||||
- Handle both sync and async streaming
|
||||
|
||||
3. **Error Handling**:
|
||||
- Use provider-specific exception classes
|
||||
- Maintain consistent error formats across providers
|
||||
- Include proper retry logic and fallback mechanisms
|
||||
|
||||
4. **Configuration**:
|
||||
- Support both environment variables and programmatic configuration
|
||||
- Use `BaseConfig` classes for provider configurations
|
||||
- Allow dynamic parameter passing
|
||||
|
||||
## PROXY SERVER (LLM GATEWAY)
|
||||
|
||||
The proxy server is a critical component that provides:
|
||||
- Authentication and authorization
|
||||
- Rate limiting and budget management
|
||||
- Load balancing across multiple models/deployments
|
||||
- Observability and logging
|
||||
- Admin dashboard UI
|
||||
- Enterprise features
|
||||
|
||||
Key files:
|
||||
- `litellm/proxy/proxy_server.py` - Main server implementation
|
||||
- `litellm/proxy/auth/` - Authentication logic
|
||||
- `litellm/proxy/management_endpoints/` - Admin API endpoints
|
||||
|
||||
## MCP (MODEL CONTEXT PROTOCOL) SUPPORT
|
||||
|
||||
LiteLLM supports MCP for agent workflows:
|
||||
- MCP server integration for tool calling
|
||||
- Transformation between OpenAI and MCP tool formats
|
||||
- Support for external MCP servers (Zapier, Jira, Linear, etc.)
|
||||
- See `litellm/experimental_mcp_client/` and `litellm/proxy/_experimental/mcp_server/`
|
||||
|
||||
## TESTING CONSIDERATIONS
|
||||
|
||||
1. **Provider Tests**: Test against real provider APIs when possible
|
||||
2. **Proxy Tests**: Include authentication, rate limiting, and routing tests
|
||||
3. **Performance Tests**: Load testing for high-throughput scenarios
|
||||
4. **Integration Tests**: End-to-end workflows including tool calling
|
||||
|
||||
## DOCUMENTATION
|
||||
|
||||
- Keep documentation in sync with code changes
|
||||
- Update provider documentation when adding new providers
|
||||
- Include code examples for new features
|
||||
- Update changelog and release notes
|
||||
|
||||
## SECURITY CONSIDERATIONS
|
||||
|
||||
- Handle API keys securely
|
||||
- Validate all inputs, especially for proxy endpoints
|
||||
- Consider rate limiting and abuse prevention
|
||||
- Follow security best practices for authentication
|
||||
|
||||
## ENTERPRISE FEATURES
|
||||
|
||||
- Some features are enterprise-only
|
||||
- Check `enterprise/` directory for enterprise-specific code
|
||||
- Maintain compatibility between open-source and enterprise versions
|
||||
|
||||
## COMMON PITFALLS TO AVOID
|
||||
|
||||
1. **Breaking Changes**: LiteLLM has many users - avoid breaking existing APIs
|
||||
2. **Provider Specifics**: Each provider has unique quirks - handle them properly
|
||||
3. **Rate Limits**: Respect provider rate limits in tests
|
||||
4. **Memory Usage**: Be mindful of memory usage in streaming scenarios
|
||||
5. **Dependencies**: Keep dependencies minimal and well-justified
|
||||
|
||||
## HELPFUL RESOURCES
|
||||
|
||||
- Main documentation: https://docs.litellm.ai/
|
||||
- Provider-specific docs in `docs/my-website/docs/providers/`
|
||||
- Admin UI for testing proxy features
|
||||
|
||||
## WHEN IN DOUBT
|
||||
|
||||
- Follow existing patterns in the codebase
|
||||
- Check similar provider implementations
|
||||
- Ensure comprehensive test coverage
|
||||
- Update documentation appropriately
|
||||
- Consider backward compatibility impact
|
@@ -1,89 +0,0 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Development Commands
|
||||
|
||||
### Installation
|
||||
- `make install-dev` - Install core development dependencies
|
||||
- `make install-proxy-dev` - Install proxy development dependencies with full feature set
|
||||
- `make install-test-deps` - Install all test dependencies
|
||||
|
||||
### Testing
|
||||
- `make test` - Run all tests
|
||||
- `make test-unit` - Run unit tests (tests/test_litellm) with 4 parallel workers
|
||||
- `make test-integration` - Run integration tests (excludes unit tests)
|
||||
- `pytest tests/` - Direct pytest execution
|
||||
|
||||
### Code Quality
|
||||
- `make lint` - Run all linting (Ruff, MyPy, Black, circular imports, import safety)
|
||||
- `make format` - Apply Black code formatting
|
||||
- `make lint-ruff` - Run Ruff linting only
|
||||
- `make lint-mypy` - Run MyPy type checking only
|
||||
|
||||
### Single Test Files
|
||||
- `poetry run pytest tests/path/to/test_file.py -v` - Run specific test file
|
||||
- `poetry run pytest tests/path/to/test_file.py::test_function -v` - Run specific test
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
LiteLLM is a unified interface for 100+ LLM providers with two main components:
|
||||
|
||||
### Core Library (`litellm/`)
|
||||
- **Main entry point**: `litellm/main.py` - Contains core completion() function
|
||||
- **Provider implementations**: `litellm/llms/` - Each provider has its own subdirectory
|
||||
- **Router system**: `litellm/router.py` + `litellm/router_utils/` - Load balancing and fallback logic
|
||||
- **Type definitions**: `litellm/types/` - Pydantic models and type hints
|
||||
- **Integrations**: `litellm/integrations/` - Third-party observability, caching, logging
|
||||
- **Caching**: `litellm/caching/` - Multiple cache backends (Redis, in-memory, S3, etc.)
|
||||
|
||||
### Proxy Server (`litellm/proxy/`)
|
||||
- **Main server**: `proxy_server.py` - FastAPI application
|
||||
- **Authentication**: `auth/` - API key management, JWT, OAuth2
|
||||
- **Database**: `db/` - Prisma ORM with PostgreSQL/SQLite support
|
||||
- **Management endpoints**: `management_endpoints/` - Admin APIs for keys, teams, models
|
||||
- **Pass-through endpoints**: `pass_through_endpoints/` - Provider-specific API forwarding
|
||||
- **Guardrails**: `guardrails/` - Safety and content filtering hooks
|
||||
- **UI Dashboard**: Served from `_experimental/out/` (Next.js build)
|
||||
|
||||
## Key Patterns
|
||||
|
||||
### Provider Implementation
|
||||
- Providers inherit from base classes in `litellm/llms/base.py`
|
||||
- Each provider has transformation functions for input/output formatting
|
||||
- Support both sync and async operations
|
||||
- Handle streaming responses and function calling
|
||||
|
||||
### Error Handling
|
||||
- Provider-specific exceptions mapped to OpenAI-compatible errors
|
||||
- Fallback logic handled by Router system
|
||||
- Comprehensive logging through `litellm/_logging.py`
|
||||
|
||||
### Configuration
|
||||
- YAML config files for proxy server (see `proxy/example_config_yaml/`)
|
||||
- Environment variables for API keys and settings
|
||||
- Database schema managed via Prisma (`proxy/schema.prisma`)
|
||||
|
||||
## Development Notes
|
||||
|
||||
### Code Style
|
||||
- Uses Black formatter, Ruff linter, MyPy type checker
|
||||
- Pydantic v2 for data validation
|
||||
- Async/await patterns throughout
|
||||
- Type hints required for all public APIs
|
||||
|
||||
### Testing Strategy
|
||||
- Unit tests in `tests/test_litellm/`
|
||||
- Integration tests for each provider in `tests/llm_translation/`
|
||||
- Proxy tests in `tests/proxy_unit_tests/`
|
||||
- Load tests in `tests/load_tests/`
|
||||
|
||||
### Database Migrations
|
||||
- Prisma handles schema migrations
|
||||
- Migration files auto-generated with `prisma migrate dev`
|
||||
- Always test migrations against both PostgreSQL and SQLite
|
||||
|
||||
### Enterprise Features
|
||||
- Enterprise-specific code in `enterprise/` directory
|
||||
- Optional features enabled via environment variables
|
||||
- Separate licensing and authentication for enterprise features
|
@@ -1,275 +0,0 @@
|
||||
# Contributing to LiteLLM
|
||||
|
||||
Thank you for your interest in contributing to LiteLLM! We welcome contributions of all kinds - from bug fixes and documentation improvements to new features and integrations.
|
||||
|
||||
## **Checklist before submitting a PR**
|
||||
|
||||
Here are the core requirements for any PR submitted to LiteLLM:
|
||||
|
||||
- [ ] **Sign the Contributor License Agreement (CLA)** - [see details](#contributor-license-agreement-cla)
|
||||
- [ ] **Add testing** - Adding at least 1 test is a hard requirement - [see details](#adding-testing)
|
||||
- [ ] **Ensure your PR passes all checks**:
|
||||
- [ ] [Unit Tests](#running-unit-tests) - `make test-unit`
|
||||
- [ ] [Linting / Formatting](#running-linting-and-formatting-checks) - `make lint`
|
||||
- [ ] **Keep scope isolated** - Your changes should address 1 specific problem at a time
|
||||
|
||||
## **Contributor License Agreement (CLA)**
|
||||
|
||||
Before contributing code to LiteLLM, you must sign our [Contributor License Agreement (CLA)](https://cla-assistant.io/BerriAI/litellm). This is a legal requirement for all contributions to be merged into the main repository.
|
||||
|
||||
**Important:** We strongly recommend reviewing and signing the CLA before starting work on your contribution to avoid any delays in the PR process.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Setup Your Local Development Environment
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/BerriAI/litellm.git
|
||||
cd litellm
|
||||
|
||||
# Create a new branch for your feature
|
||||
git checkout -b your-feature-branch
|
||||
|
||||
# Install development dependencies
|
||||
make install-dev
|
||||
|
||||
# Verify your setup works
|
||||
make help
|
||||
```
|
||||
|
||||
That's it! Your local development environment is ready.
|
||||
|
||||
### 2. Development Workflow
|
||||
|
||||
Here's the recommended workflow for making changes:
|
||||
|
||||
```bash
|
||||
# Make your changes to the code
|
||||
# ...
|
||||
|
||||
# Format your code (auto-fixes formatting issues)
|
||||
make format
|
||||
|
||||
# Run all linting checks (matches CI exactly)
|
||||
make lint
|
||||
|
||||
# Run unit tests to ensure nothing is broken
|
||||
make test-unit
|
||||
|
||||
# Commit your changes
|
||||
git add .
|
||||
git commit -m "Your descriptive commit message"
|
||||
|
||||
# Push and create a PR
|
||||
git push origin your-feature-branch
|
||||
```
|
||||
|
||||
## Adding Testing
|
||||
|
||||
**Adding at least 1 test is a hard requirement for all PRs.**
|
||||
|
||||
### Where to Add Tests
|
||||
|
||||
Add your tests to the [`tests/test_litellm/` directory](https://github.com/BerriAI/litellm/tree/main/tests/test_litellm).
|
||||
|
||||
- This directory mirrors the structure of the `litellm/` directory
|
||||
- **Only add mocked tests** - no real LLM API calls in this directory
|
||||
- For integration tests with real APIs, use the appropriate test directories
|
||||
|
||||
### File Naming Convention
|
||||
|
||||
The `tests/test_litellm/` directory follows the same structure as `litellm/`:
|
||||
|
||||
- `litellm/proxy/caching_routes.py` → `tests/test_litellm/proxy/test_caching_routes.py`
|
||||
- `litellm/utils.py` → `tests/test_litellm/test_utils.py`
|
||||
|
||||
### Example Test
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from litellm import completion
|
||||
|
||||
def test_your_feature():
|
||||
"""Test your feature with a descriptive docstring."""
|
||||
# Arrange
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
|
||||
# Act
|
||||
# Use mocked responses, not real API calls
|
||||
|
||||
# Assert
|
||||
assert expected_result == actual_result
|
||||
```
|
||||
|
||||
## Running Tests and Checks
|
||||
|
||||
### Running Unit Tests
|
||||
|
||||
Run all unit tests (uses parallel execution for speed):
|
||||
|
||||
```bash
|
||||
make test-unit
|
||||
```
|
||||
|
||||
Run specific test files:
|
||||
```bash
|
||||
poetry run pytest tests/test_litellm/test_your_file.py -v
|
||||
```
|
||||
|
||||
### Running Linting and Formatting Checks
|
||||
|
||||
Run all linting checks (matches CI exactly):
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
Individual linting commands:
|
||||
```bash
|
||||
make format-check # Check Black formatting
|
||||
make lint-ruff # Run Ruff linting
|
||||
make lint-mypy # Run MyPy type checking
|
||||
make check-circular-imports # Check for circular imports
|
||||
make check-import-safety # Check import safety
|
||||
```
|
||||
|
||||
Apply formatting (auto-fixes issues):
|
||||
```bash
|
||||
make format
|
||||
```
|
||||
|
||||
### CI Compatibility
|
||||
|
||||
To ensure your changes will pass CI, run the exact same checks locally:
|
||||
|
||||
```bash
|
||||
# This runs the same checks as the GitHub workflows
|
||||
make lint
|
||||
make test-unit
|
||||
```
|
||||
|
||||
For exact CI compatibility (pins OpenAI version like CI):
|
||||
```bash
|
||||
make install-dev-ci # Installs exact CI dependencies
|
||||
```
|
||||
|
||||
## Available Make Commands
|
||||
|
||||
Run `make help` to see all available commands:
|
||||
|
||||
```bash
|
||||
make help # Show all available commands
|
||||
make install-dev # Install development dependencies
|
||||
make install-proxy-dev # Install proxy development dependencies
|
||||
make install-test-deps # Install test dependencies (for running tests)
|
||||
make format # Apply Black code formatting
|
||||
make format-check # Check Black formatting (matches CI)
|
||||
make lint # Run all linting checks
|
||||
make test-unit # Run unit tests
|
||||
make test-integration # Run integration tests
|
||||
make test-unit-helm # Run Helm unit tests
|
||||
```
|
||||
|
||||
## Code Quality Standards
|
||||
|
||||
LiteLLM follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html).
|
||||
|
||||
Our automated quality checks include:
|
||||
- **Black** for consistent code formatting
|
||||
- **Ruff** for linting and code quality
|
||||
- **MyPy** for static type checking
|
||||
- **Circular import detection**
|
||||
- **Import safety validation**
|
||||
|
||||
All checks must pass before your PR can be merged.
|
||||
|
||||
## Common Issues and Solutions
|
||||
|
||||
### 1. Linting Failures
|
||||
|
||||
If `make lint` fails:
|
||||
|
||||
1. **Formatting issues**: Run `make format` to auto-fix
|
||||
2. **Ruff issues**: Check the output and fix manually
|
||||
3. **MyPy issues**: Add proper type hints
|
||||
4. **Circular imports**: Refactor import dependencies
|
||||
5. **Import safety**: Fix any unprotected imports
|
||||
|
||||
### 2. Test Failures
|
||||
|
||||
If `make test-unit` fails:
|
||||
|
||||
1. Check if you broke existing functionality
|
||||
2. Add tests for your new code
|
||||
3. Ensure tests use mocks, not real API calls
|
||||
4. Check test file naming conventions
|
||||
|
||||
### 3. Common Development Tips
|
||||
|
||||
- **Use type hints**: MyPy requires proper type annotations
|
||||
- **Write descriptive commit messages**: Help reviewers understand your changes
|
||||
- **Keep PRs focused**: One feature/fix per PR
|
||||
- **Test edge cases**: Don't just test the happy path
|
||||
- **Update documentation**: If you change APIs, update docs
|
||||
|
||||
## Building and Running Locally
|
||||
|
||||
### LiteLLM Proxy Server
|
||||
|
||||
To run the proxy server locally:
|
||||
|
||||
```bash
|
||||
# Install proxy dependencies
|
||||
make install-proxy-dev
|
||||
|
||||
# Start the proxy server
|
||||
poetry run litellm --config your_config.yaml
|
||||
```
|
||||
|
||||
### Docker Development
|
||||
|
||||
If you want to build the Docker image yourself:
|
||||
|
||||
```bash
|
||||
# Build using the non-root Dockerfile
|
||||
docker build -f docker/Dockerfile.non_root -t litellm_dev .
|
||||
|
||||
# Run with your config
|
||||
docker run \
|
||||
-v $(pwd)/proxy_config.yaml:/app/config.yaml \
|
||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||
-p 4000:4000 \
|
||||
litellm_dev \
|
||||
--config /app/config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
## Submitting Your PR
|
||||
|
||||
1. **Push your branch**: `git push origin your-feature-branch`
|
||||
2. **Create a PR**: Go to GitHub and create a pull request
|
||||
3. **Fill out the PR template**: Provide clear description of changes
|
||||
4. **Wait for review**: Maintainers will review and provide feedback
|
||||
5. **Address feedback**: Make requested changes and push updates
|
||||
6. **Merge**: Once approved, your PR will be merged!
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you need help:
|
||||
|
||||
- 💬 [Join our Discord](https://discord.gg/wuPM9dRgDw)
|
||||
- 💬 [Join our Slack](https://join.slack.com/share/enQtOTE0ODczMzk2Nzk4NC01YjUxNjY2YjBlYTFmNDRiZTM3NDFiYTM3MzVkODFiMDVjOGRjMmNmZTZkZTMzOWQzZGQyZWIwYjQ0MWExYmE3)
|
||||
- 📧 Email us: ishaan@berri.ai / krrish@berri.ai
|
||||
- 🐛 [Create an issue](https://github.com/BerriAI/litellm/issues/new)
|
||||
|
||||
## What to Contribute
|
||||
|
||||
Looking for ideas? Check out:
|
||||
|
||||
- 🐛 [Good first issues](https://github.com/BerriAI/litellm/labels/good%20first%20issue)
|
||||
- 🚀 [Feature requests](https://github.com/BerriAI/litellm/labels/enhancement)
|
||||
- 📚 Documentation improvements
|
||||
- 🧪 Test coverage improvements
|
||||
- 🔌 New LLM provider integrations
|
||||
|
||||
Thank you for contributing to LiteLLM! 🚀
|
@@ -1,84 +0,0 @@
|
||||
# Base image for building
|
||||
ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/python:latest-dev
|
||||
|
||||
# Runtime image
|
||||
ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/python:latest-dev
|
||||
# Builder stage
|
||||
FROM $LITELLM_BUILD_IMAGE AS builder
|
||||
|
||||
# Set the working directory to /app
|
||||
WORKDIR /app
|
||||
|
||||
USER root
|
||||
|
||||
# Install build dependencies
|
||||
RUN apk add --no-cache gcc python3-dev openssl openssl-dev
|
||||
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install build
|
||||
|
||||
# Copy the current directory contents into the container at /app
|
||||
COPY . .
|
||||
|
||||
# Build Admin UI
|
||||
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
|
||||
|
||||
# Build the package
|
||||
RUN rm -rf dist/* && python -m build
|
||||
|
||||
# There should be only one wheel file now, assume the build only creates one
|
||||
RUN ls -1 dist/*.whl | head -1
|
||||
|
||||
# Install the package
|
||||
RUN pip install dist/*.whl
|
||||
|
||||
# install dependencies as wheels
|
||||
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
|
||||
|
||||
# ensure pyjwt is used, not jwt
|
||||
RUN pip uninstall jwt -y
|
||||
RUN pip uninstall PyJWT -y
|
||||
RUN pip install PyJWT==2.9.0 --no-cache-dir
|
||||
|
||||
# Build Admin UI
|
||||
RUN chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
|
||||
|
||||
# Runtime stage
|
||||
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||
|
||||
# Ensure runtime stage runs as root
|
||||
USER root
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache openssl tzdata
|
||||
|
||||
WORKDIR /app
|
||||
# Copy the current directory contents into the container at /app
|
||||
COPY . .
|
||||
RUN ls -la /app
|
||||
|
||||
# Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present
|
||||
COPY --from=builder /app/dist/*.whl .
|
||||
COPY --from=builder /wheels/ /wheels/
|
||||
|
||||
# Install the built wheel using pip; again using a wildcard if it's the only file
|
||||
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
||||
|
||||
# Install semantic_router without dependencies
|
||||
RUN pip install semantic_router --no-deps
|
||||
|
||||
# Generate prisma client
|
||||
RUN prisma generate
|
||||
RUN chmod +x docker/entrypoint.sh
|
||||
RUN chmod +x docker/prod_entrypoint.sh
|
||||
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
RUN apk add --no-cache supervisor
|
||||
COPY docker/supervisord.conf /etc/supervisord.conf
|
||||
|
||||
ENTRYPOINT ["docker/prod_entrypoint.sh"]
|
||||
|
||||
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
|
||||
CMD ["--port", "4000"]
|
@@ -1,89 +0,0 @@
|
||||
# GEMINI.md
|
||||
|
||||
This file provides guidance to Gemini when working with code in this repository.
|
||||
|
||||
## Development Commands
|
||||
|
||||
### Installation
|
||||
- `make install-dev` - Install core development dependencies
|
||||
- `make install-proxy-dev` - Install proxy development dependencies with full feature set
|
||||
- `make install-test-deps` - Install all test dependencies
|
||||
|
||||
### Testing
|
||||
- `make test` - Run all tests
|
||||
- `make test-unit` - Run unit tests (tests/test_litellm) with 4 parallel workers
|
||||
- `make test-integration` - Run integration tests (excludes unit tests)
|
||||
- `pytest tests/` - Direct pytest execution
|
||||
|
||||
### Code Quality
|
||||
- `make lint` - Run all linting (Ruff, MyPy, Black, circular imports, import safety)
|
||||
- `make format` - Apply Black code formatting
|
||||
- `make lint-ruff` - Run Ruff linting only
|
||||
- `make lint-mypy` - Run MyPy type checking only
|
||||
|
||||
### Single Test Files
|
||||
- `poetry run pytest tests/path/to/test_file.py -v` - Run specific test file
|
||||
- `poetry run pytest tests/path/to/test_file.py::test_function -v` - Run specific test
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
LiteLLM is a unified interface for 100+ LLM providers with two main components:
|
||||
|
||||
### Core Library (`litellm/`)
|
||||
- **Main entry point**: `litellm/main.py` - Contains core completion() function
|
||||
- **Provider implementations**: `litellm/llms/` - Each provider has its own subdirectory
|
||||
- **Router system**: `litellm/router.py` + `litellm/router_utils/` - Load balancing and fallback logic
|
||||
- **Type definitions**: `litellm/types/` - Pydantic models and type hints
|
||||
- **Integrations**: `litellm/integrations/` - Third-party observability, caching, logging
|
||||
- **Caching**: `litellm/caching/` - Multiple cache backends (Redis, in-memory, S3, etc.)
|
||||
|
||||
### Proxy Server (`litellm/proxy/`)
|
||||
- **Main server**: `proxy_server.py` - FastAPI application
|
||||
- **Authentication**: `auth/` - API key management, JWT, OAuth2
|
||||
- **Database**: `db/` - Prisma ORM with PostgreSQL/SQLite support
|
||||
- **Management endpoints**: `management_endpoints/` - Admin APIs for keys, teams, models
|
||||
- **Pass-through endpoints**: `pass_through_endpoints/` - Provider-specific API forwarding
|
||||
- **Guardrails**: `guardrails/` - Safety and content filtering hooks
|
||||
- **UI Dashboard**: Served from `_experimental/out/` (Next.js build)
|
||||
|
||||
## Key Patterns
|
||||
|
||||
### Provider Implementation
|
||||
- Providers inherit from base classes in `litellm/llms/base.py`
|
||||
- Each provider has transformation functions for input/output formatting
|
||||
- Support both sync and async operations
|
||||
- Handle streaming responses and function calling
|
||||
|
||||
### Error Handling
|
||||
- Provider-specific exceptions mapped to OpenAI-compatible errors
|
||||
- Fallback logic handled by Router system
|
||||
- Comprehensive logging through `litellm/_logging.py`
|
||||
|
||||
### Configuration
|
||||
- YAML config files for proxy server (see `proxy/example_config_yaml/`)
|
||||
- Environment variables for API keys and settings
|
||||
- Database schema managed via Prisma (`proxy/schema.prisma`)
|
||||
|
||||
## Development Notes
|
||||
|
||||
### Code Style
|
||||
- Uses Black formatter, Ruff linter, MyPy type checker
|
||||
- Pydantic v2 for data validation
|
||||
- Async/await patterns throughout
|
||||
- Type hints required for all public APIs
|
||||
|
||||
### Testing Strategy
|
||||
- Unit tests in `tests/test_litellm/`
|
||||
- Integration tests for each provider in `tests/llm_translation/`
|
||||
- Proxy tests in `tests/proxy_unit_tests/`
|
||||
- Load tests in `tests/load_tests/`
|
||||
|
||||
### Database Migrations
|
||||
- Prisma handles schema migrations
|
||||
- Migration files auto-generated with `prisma migrate dev`
|
||||
- Always test migrations against both PostgreSQL and SQLite
|
||||
|
||||
### Enterprise Features
|
||||
- Enterprise-specific code in `enterprise/` directory
|
||||
- Optional features enabled via environment variables
|
||||
- Separate licensing and authentication for enterprise features
|
@@ -1,26 +0,0 @@
|
||||
Portions of this software are licensed as follows:
|
||||
|
||||
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
|
||||
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
|
||||
---
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Berri AI
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@@ -1,103 +0,0 @@
|
||||
# LiteLLM Makefile
|
||||
# Simple Makefile for running tests and basic development tasks
|
||||
|
||||
.PHONY: help test test-unit test-integration test-unit-helm lint format install-dev install-proxy-dev install-test-deps install-helm-unittest check-circular-imports check-import-safety
|
||||
|
||||
# Default target
|
||||
help:
|
||||
@echo "Available commands:"
|
||||
@echo " make install-dev - Install development dependencies"
|
||||
@echo " make install-proxy-dev - Install proxy development dependencies"
|
||||
@echo " make install-dev-ci - Install dev dependencies (CI-compatible, pins OpenAI)"
|
||||
@echo " make install-proxy-dev-ci - Install proxy dev dependencies (CI-compatible)"
|
||||
@echo " make install-test-deps - Install test dependencies"
|
||||
@echo " make install-helm-unittest - Install helm unittest plugin"
|
||||
@echo " make format - Apply Black code formatting"
|
||||
@echo " make format-check - Check Black code formatting (matches CI)"
|
||||
@echo " make lint - Run all linting (Ruff, MyPy, Black check, circular imports, import safety)"
|
||||
@echo " make lint-ruff - Run Ruff linting only"
|
||||
@echo " make lint-mypy - Run MyPy type checking only"
|
||||
@echo " make lint-black - Check Black formatting (matches CI)"
|
||||
@echo " make check-circular-imports - Check for circular imports"
|
||||
@echo " make check-import-safety - Check import safety"
|
||||
@echo " make test - Run all tests"
|
||||
@echo " make test-unit - Run unit tests (tests/test_litellm)"
|
||||
@echo " make test-integration - Run integration tests"
|
||||
@echo " make test-unit-helm - Run helm unit tests"
|
||||
|
||||
# Installation targets
|
||||
install-dev:
|
||||
poetry install --with dev
|
||||
|
||||
install-proxy-dev:
|
||||
poetry install --with dev,proxy-dev --extras proxy
|
||||
|
||||
# CI-compatible installations (matches GitHub workflows exactly)
|
||||
install-dev-ci:
|
||||
pip install openai==1.99.5
|
||||
poetry install --with dev
|
||||
pip install openai==1.99.5
|
||||
|
||||
install-proxy-dev-ci:
|
||||
poetry install --with dev,proxy-dev --extras proxy
|
||||
pip install openai==1.99.5
|
||||
|
||||
install-test-deps: install-proxy-dev
|
||||
poetry run pip install "pytest-retry==1.6.3"
|
||||
poetry run pip install pytest-xdist
|
||||
cd enterprise && python -m pip install -e . && cd ..
|
||||
|
||||
install-helm-unittest:
|
||||
helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
|
||||
|
||||
# Formatting
|
||||
format: install-dev
|
||||
cd litellm && poetry run black . && cd ..
|
||||
|
||||
format-check: install-dev
|
||||
cd litellm && poetry run black --check . && cd ..
|
||||
|
||||
# Linting targets
|
||||
lint-ruff: install-dev
|
||||
cd litellm && poetry run ruff check . && cd ..
|
||||
|
||||
lint-mypy: install-dev
|
||||
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
|
||||
cd litellm && poetry run mypy . --ignore-missing-imports && cd ..
|
||||
|
||||
lint-black: format-check
|
||||
|
||||
check-circular-imports: install-dev
|
||||
cd litellm && poetry run python ../tests/documentation_tests/test_circular_imports.py && cd ..
|
||||
|
||||
check-import-safety: install-dev
|
||||
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||
|
||||
# Combined linting (matches test-linting.yml workflow)
|
||||
lint: format-check lint-ruff lint-mypy check-circular-imports check-import-safety
|
||||
|
||||
# Testing targets
|
||||
test:
|
||||
poetry run pytest tests/
|
||||
|
||||
test-unit: install-test-deps
|
||||
poetry run pytest tests/test_litellm -x -vv -n 4
|
||||
|
||||
test-integration:
|
||||
poetry run pytest tests/ -k "not test_litellm"
|
||||
|
||||
test-unit-helm: install-helm-unittest
|
||||
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
|
||||
|
||||
# LLM Translation testing targets
|
||||
test-llm-translation: install-test-deps
|
||||
@echo "Running LLM translation tests..."
|
||||
@python .github/workflows/run_llm_translation_tests.py
|
||||
|
||||
test-llm-translation-single: install-test-deps
|
||||
@echo "Running single LLM translation test file..."
|
||||
@if [ -z "$(FILE)" ]; then echo "Usage: make test-llm-translation-single FILE=test_filename.py"; exit 1; fi
|
||||
@mkdir -p test-results
|
||||
poetry run pytest tests/llm_translation/$(FILE) \
|
||||
--junitxml=test-results/junit.xml \
|
||||
-v --tb=short --maxfail=100 --timeout=300
|
@@ -1,448 +0,0 @@
|
||||
<h1 align="center">
|
||||
🚅 LiteLLM
|
||||
</h1>
|
||||
<p align="center">
|
||||
<p align="center">
|
||||
<a href="https://render.com/deploy?repo=https://github.com/BerriAI/litellm" target="_blank" rel="nofollow"><img src="https://render.com/images/deploy-to-render-button.svg" alt="Deploy to Render"></a>
|
||||
<a href="https://railway.app/template/HLP0Ub?referralCode=jch2ME">
|
||||
<img src="https://railway.app/button.svg" alt="Deploy on Railway">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
|
||||
<br>
|
||||
</p>
|
||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||
<h4 align="center">
|
||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||
</a>
|
||||
<a href="https://www.ycombinator.com/companies/berriai">
|
||||
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
||||
</a>
|
||||
<a href="https://wa.link/huol9n">
|
||||
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square" alt="Whatsapp">
|
||||
</a>
|
||||
<a href="https://discord.gg/wuPM9dRgDw">
|
||||
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square" alt="Discord">
|
||||
</a>
|
||||
<a href="https://join.slack.com/share/enQtOTE0ODczMzk2Nzk4NC01YjUxNjY2YjBlYTFmNDRiZTM3NDFiYTM3MzVkODFiMDVjOGRjMmNmZTZkZTMzOWQzZGQyZWIwYjQ0MWExYmE3">
|
||||
<img src="https://img.shields.io/static/v1?label=Chat%20on&message=Slack&color=black&logo=Slack&style=flat-square" alt="Slack">
|
||||
</a>
|
||||
</h4>
|
||||
|
||||
LiteLLM manages:
|
||||
|
||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
|
||||
|
||||
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
||||
|
||||
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. [More information about the release cycle here](https://docs.litellm.ai/docs/proxy/release_cycle)
|
||||
|
||||
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||
|
||||
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
||||
|
||||
> [!IMPORTANT]
|
||||
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
|
||||
> LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required.
|
||||
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
||||
```shell
|
||||
pip install litellm
|
||||
```
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import os
|
||||
|
||||
## set ENV variables
|
||||
os.environ["OPENAI_API_KEY"] = "your-openai-key"
|
||||
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
|
||||
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||
|
||||
# openai call
|
||||
response = completion(model="openai/gpt-4o", messages=messages)
|
||||
|
||||
# anthropic call
|
||||
response = completion(model="anthropic/claude-sonnet-4-20250514", messages=messages)
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Response (OpenAI Format)
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-1214900a-6cdd-4148-b663-b5e2f642b4de",
|
||||
"created": 1751494488,
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "Hello! I'm doing well, thank you for asking. I'm here and ready to help with whatever you'd like to discuss or work on. How are you doing today?",
|
||||
"role": "assistant",
|
||||
"tool_calls": null,
|
||||
"function_call": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"completion_tokens": 39,
|
||||
"prompt_tokens": 13,
|
||||
"total_tokens": 52,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": null,
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"cache_creation_input_tokens": 0,
|
||||
"cache_read_input_tokens": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Call any model supported by a provider, with `model=<provider_name>/<model_name>`. There might be provider-specific details here, so refer to [provider docs for more information](https://docs.litellm.ai/docs/providers)
|
||||
|
||||
## Async ([Docs](https://docs.litellm.ai/docs/completion/stream#async-completion))
|
||||
|
||||
```python
|
||||
from litellm import acompletion
|
||||
import asyncio
|
||||
|
||||
async def test_get_response():
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
response = await acompletion(model="openai/gpt-4o", messages=messages)
|
||||
return response
|
||||
|
||||
response = asyncio.run(test_get_response())
|
||||
print(response)
|
||||
```
|
||||
|
||||
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
|
||||
|
||||
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
||||
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
response = completion(model="openai/gpt-4o", messages=messages, stream=True)
|
||||
for part in response:
|
||||
print(part.choices[0].delta.content or "")
|
||||
|
||||
# claude sonnet 4
|
||||
response = completion('anthropic/claude-sonnet-4-20250514', messages, stream=True)
|
||||
for part in response:
|
||||
print(part)
|
||||
```
|
||||
|
||||
### Response chunk (OpenAI Format)
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-fe575c37-5004-4926-ae5e-bfbc31f356ca",
|
||||
"created": 1751494808,
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"object": "chat.completion.chunk",
|
||||
"system_fingerprint": null,
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"delta": {
|
||||
"provider_specific_fields": null,
|
||||
"content": "Hello",
|
||||
"role": "assistant",
|
||||
"function_call": null,
|
||||
"tool_calls": null,
|
||||
"audio": null
|
||||
},
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"provider_specific_fields": null,
|
||||
"stream_options": null,
|
||||
"citations": null
|
||||
}
|
||||
```
|
||||
|
||||
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## set env variables for logging tools (when using MLflow, no API key set up is required)
|
||||
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||
os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "your-openai-key"
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
|
||||
|
||||
#openai call
|
||||
response = completion(model="openai/gpt-4o", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
```
|
||||
|
||||
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
||||
|
||||
Track spend + Load Balance across multiple projects
|
||||
|
||||
[Hosted Proxy (Preview)](https://docs.litellm.ai/docs/hosted)
|
||||
|
||||
The proxy provides:
|
||||
|
||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||
4. [Rate Limiting](https://docs.litellm.ai/docs/proxy/users#set-rate-limits)
|
||||
|
||||
## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/)
|
||||
|
||||
|
||||
## Quick Start Proxy - CLI
|
||||
|
||||
```shell
|
||||
pip install 'litellm[proxy]'
|
||||
```
|
||||
|
||||
### Step 1: Start litellm proxy
|
||||
|
||||
```shell
|
||||
$ litellm --model huggingface/bigcode/starcoder
|
||||
|
||||
#INFO: Proxy running on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
### Step 2: Make ChatCompletions Request to Proxy
|
||||
|
||||
|
||||
> [!IMPORTANT]
|
||||
> 💡 [Use LiteLLM Proxy with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/proxy/user_keys)
|
||||
|
||||
```python
|
||||
import openai # openai v1.0.0+
|
||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
])
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
|
||||
|
||||
Connect the proxy with a Postgres DB to create proxy keys
|
||||
|
||||
```bash
|
||||
# Get the code
|
||||
git clone https://github.com/BerriAI/litellm
|
||||
|
||||
# Go to folder
|
||||
cd litellm
|
||||
|
||||
# Add the master key - you can change this after setup
|
||||
echo 'LITELLM_MASTER_KEY="sk-1234"' > .env
|
||||
|
||||
# Add the litellm salt key - you cannot change this after adding a model
|
||||
# It is used to encrypt / decrypt your LLM API Key credentials
|
||||
# We recommend - https://1password.com/password-generator/
|
||||
# password generator to get a random hash for litellm salt key
|
||||
echo 'LITELLM_SALT_KEY="sk-1234"' >> .env
|
||||
|
||||
source .env
|
||||
|
||||
# Start
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
|
||||
UI on `/ui` on your proxy server
|
||||

|
||||
|
||||
Set budgets and rate limits across multiple projects
|
||||
`POST /key/generate`
|
||||
|
||||
### Request
|
||||
|
||||
```shell
|
||||
curl 'http://0.0.0.0:4000/key/generate' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}'
|
||||
```
|
||||
|
||||
### Expected Response
|
||||
|
||||
```shell
|
||||
{
|
||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
||||
"expires": "2023-11-19T01:38:25.838000+00:00" # datetime object
|
||||
}
|
||||
```
|
||||
|
||||
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
||||
|
||||
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
||||
|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
|
||||
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [Meta - Llama API](https://docs.litellm.ai/docs/providers/meta_llama) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [AI/ML API](https://docs.litellm.ai/docs/providers/aiml) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [empower](https://docs.litellm.ai/docs/providers/empower) | ✅ | ✅ | ✅ | ✅ |
|
||||
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
|
||||
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
|
||||
| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [Galadriel](https://docs.litellm.ai/docs/providers/galadriel) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [GradientAI](https://docs.litellm.ai/docs/providers/gradient_ai) | ✅ | ✅ | | | | |
|
||||
| [Novita AI](https://novita.ai/models/llm?utm_source=github_litellm&utm_medium=github_readme&utm_campaign=github_link) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [Featherless AI](https://docs.litellm.ai/docs/providers/featherless_ai) | ✅ | ✅ | ✅ | ✅ | | |
|
||||
| [Nebius AI Studio](https://docs.litellm.ai/docs/providers/nebius) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||
|
||||
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
||||
|
||||
## Contributing
|
||||
|
||||
Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and LLM integrations are both accepted and highly encouraged!
|
||||
|
||||
**Quick start:** `git clone` → `make install-dev` → `make format` → `make lint` → `make test-unit`
|
||||
|
||||
See our comprehensive [Contributing Guide (CONTRIBUTING.md)](CONTRIBUTING.md) for detailed instructions.
|
||||
|
||||
# Enterprise
|
||||
For companies that need better security, user management and professional support
|
||||
|
||||
[Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||
|
||||
This covers:
|
||||
- ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
|
||||
- ✅ **Feature Prioritization**
|
||||
- ✅ **Custom Integrations**
|
||||
- ✅ **Professional Support - Dedicated discord + slack**
|
||||
- ✅ **Custom SLAs**
|
||||
- ✅ **Secure access with Single Sign-On**
|
||||
|
||||
# Contributing
|
||||
|
||||
We welcome contributions to LiteLLM! Whether you're fixing bugs, adding features, or improving documentation, we appreciate your help.
|
||||
|
||||
## Quick Start for Contributors
|
||||
|
||||
This requires poetry to be installed.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/BerriAI/litellm.git
|
||||
cd litellm
|
||||
make install-dev # Install development dependencies
|
||||
make format # Format your code
|
||||
make lint # Run all linting checks
|
||||
make test-unit # Run unit tests
|
||||
make format-check # Check formatting only
|
||||
```
|
||||
|
||||
For detailed contributing guidelines, see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## Code Quality / Linting
|
||||
|
||||
LiteLLM follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html).
|
||||
|
||||
Our automated checks include:
|
||||
- **Black** for code formatting
|
||||
- **Ruff** for linting and code quality
|
||||
- **MyPy** for type checking
|
||||
- **Circular import detection**
|
||||
- **Import safety checks**
|
||||
|
||||
|
||||
All these checks must pass before your PR can be merged.
|
||||
|
||||
|
||||
# Support / talk with founders
|
||||
|
||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||
- [Community Slack 💭](https://join.slack.com/share/enQtOTE0ODczMzk2Nzk4NC01YjUxNjY2YjBlYTFmNDRiZTM3NDFiYTM3MzVkODFiMDVjOGRjMmNmZTZkZTMzOWQzZGQyZWIwYjQ0MWExYmE3)
|
||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||
|
||||
# Why did we build this
|
||||
|
||||
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
|
||||
|
||||
# Contributors
|
||||
|
||||
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
|
||||
<!-- prettier-ignore-start -->
|
||||
<!-- markdownlint-disable -->
|
||||
|
||||
<!-- markdownlint-restore -->
|
||||
<!-- prettier-ignore-end -->
|
||||
|
||||
<!-- ALL-CONTRIBUTORS-LIST:END -->
|
||||
|
||||
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
|
||||
</a>
|
||||
|
||||
|
||||
## Run in Developer mode
|
||||
### Services
|
||||
1. Setup .env file in root
|
||||
2. Run dependant services `docker-compose up db prometheus`
|
||||
|
||||
### Backend
|
||||
1. (In root) create virtual environment `python -m venv .venv`
|
||||
2. Activate virtual environment `source .venv/bin/activate`
|
||||
3. Install dependencies `pip install -e ".[all]"`
|
||||
4. Start proxy backend `python3 /path/to/litellm/proxy_cli.py`
|
||||
|
||||
### Frontend
|
||||
1. Navigate to `ui/litellm-dashboard`
|
||||
2. Install dependencies `npm install`
|
||||
3. Run `npm run dev` to start the dashboard
|
@@ -1,60 +0,0 @@
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def create_baseline():
|
||||
"""Create baseline migration in deploy/migrations"""
|
||||
try:
|
||||
# Get paths
|
||||
root_dir = Path(__file__).parent.parent
|
||||
deploy_dir = root_dir / "deploy"
|
||||
migrations_dir = deploy_dir / "migrations"
|
||||
schema_path = root_dir / "schema.prisma"
|
||||
|
||||
# Create migrations directory
|
||||
migrations_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create migration_lock.toml if it doesn't exist
|
||||
lock_file = migrations_dir / "migration_lock.toml"
|
||||
if not lock_file.exists():
|
||||
lock_file.write_text('provider = "postgresql"\n')
|
||||
|
||||
# Create timestamp-based migration directory
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
migration_dir = migrations_dir / f"{timestamp}_baseline"
|
||||
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate migration SQL
|
||||
result = subprocess.run(
|
||||
[
|
||||
"prisma",
|
||||
"migrate",
|
||||
"diff",
|
||||
"--from-empty",
|
||||
"--to-schema-datamodel",
|
||||
str(schema_path),
|
||||
"--script",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Write the SQL to migration.sql
|
||||
migration_file = migration_dir / "migration.sql"
|
||||
migration_file.write_text(result.stdout)
|
||||
|
||||
print(f"Created baseline migration in {migration_dir}")
|
||||
return True
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running prisma command: {e.stderr}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error creating baseline migration: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_baseline()
|
@@ -1,28 +0,0 @@
|
||||
import sys
|
||||
|
||||
|
||||
def check_file_length(max_lines, filenames):
|
||||
bad_files = []
|
||||
for filename in filenames:
|
||||
with open(filename, "r") as file:
|
||||
lines = file.readlines()
|
||||
if len(lines) > max_lines:
|
||||
bad_files.append((filename, len(lines)))
|
||||
return bad_files
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
max_lines = int(sys.argv[1])
|
||||
filenames = sys.argv[2:]
|
||||
|
||||
bad_files = check_file_length(max_lines, filenames)
|
||||
if bad_files:
|
||||
bad_files.sort(
|
||||
key=lambda x: x[1], reverse=True
|
||||
) # Sort files by length in descending order
|
||||
for filename, length in bad_files:
|
||||
print(f"{filename}: {length} lines")
|
||||
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.exit(0)
|
@@ -1,32 +0,0 @@
|
||||
import sys
|
||||
import filecmp
|
||||
import shutil
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
print(
|
||||
"Comparing model_prices_and_context_window and litellm/model_prices_and_context_window_backup.json files... checking if they match."
|
||||
)
|
||||
|
||||
file1 = "model_prices_and_context_window.json"
|
||||
file2 = "litellm/model_prices_and_context_window_backup.json"
|
||||
|
||||
cmp_result = filecmp.cmp(file1, file2, shallow=False)
|
||||
|
||||
if cmp_result:
|
||||
print(f"Passed! Files {file1} and {file2} match.")
|
||||
return 0
|
||||
else:
|
||||
print(
|
||||
f"Failed! Files {file1} and {file2} do not match. Copying content from {file1} to {file2}."
|
||||
)
|
||||
copy_content(file1, file2)
|
||||
return 1
|
||||
|
||||
|
||||
def copy_content(source, destination):
|
||||
shutil.copy2(source, destination)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
@@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on error
|
||||
set -e
|
||||
|
||||
echo "🚀 Building and publishing litellm-proxy-extras"
|
||||
|
||||
# Navigate to litellm-proxy-extras directory
|
||||
cd "$(dirname "$0")/../litellm-proxy-extras"
|
||||
|
||||
# Build the package
|
||||
echo "📦 Building package..."
|
||||
poetry build
|
||||
|
||||
# Publish to PyPI
|
||||
echo "🌎 Publishing to PyPI..."
|
||||
poetry publish
|
||||
|
||||
echo "✅ Done! Package published successfully"
|
@@ -1,95 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import testing.postgresql
|
||||
import shutil
|
||||
|
||||
|
||||
def create_migration(migration_name: str = None):
|
||||
"""
|
||||
Create a new migration SQL file in the migrations directory by comparing
|
||||
current database state with schema
|
||||
|
||||
Args:
|
||||
migration_name (str): Name for the migration
|
||||
"""
|
||||
try:
|
||||
# Get paths
|
||||
root_dir = Path(__file__).parent.parent
|
||||
migrations_dir = root_dir / "litellm-proxy-extras" / "litellm_proxy_extras" / "migrations"
|
||||
schema_path = root_dir / "schema.prisma"
|
||||
|
||||
# Create temporary PostgreSQL database
|
||||
with testing.postgresql.Postgresql() as postgresql:
|
||||
db_url = postgresql.url()
|
||||
|
||||
# Create temporary migrations directory next to schema.prisma
|
||||
temp_migrations_dir = schema_path.parent / "migrations"
|
||||
|
||||
try:
|
||||
# Copy existing migrations to temp directory
|
||||
if temp_migrations_dir.exists():
|
||||
shutil.rmtree(temp_migrations_dir)
|
||||
shutil.copytree(migrations_dir, temp_migrations_dir)
|
||||
|
||||
# Apply existing migrations to temp database
|
||||
os.environ["DATABASE_URL"] = db_url
|
||||
subprocess.run(
|
||||
["prisma", "migrate", "deploy", "--schema", str(schema_path)],
|
||||
check=True,
|
||||
)
|
||||
|
||||
# Generate diff between current database and schema
|
||||
result = subprocess.run(
|
||||
[
|
||||
"prisma",
|
||||
"migrate",
|
||||
"diff",
|
||||
"--from-url",
|
||||
db_url,
|
||||
"--to-schema-datamodel",
|
||||
str(schema_path),
|
||||
"--script",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
if result.stdout.strip():
|
||||
# Generate timestamp and create migration directory
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
migration_name = migration_name or "unnamed_migration"
|
||||
migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
|
||||
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write the SQL to migration.sql
|
||||
migration_file = migration_dir / "migration.sql"
|
||||
migration_file.write_text(result.stdout)
|
||||
|
||||
print(f"Created migration in {migration_dir}")
|
||||
return True
|
||||
else:
|
||||
print("No schema changes detected. Migration not needed.")
|
||||
return False
|
||||
|
||||
finally:
|
||||
# Clean up: remove temporary migrations directory
|
||||
if temp_migrations_dir.exists():
|
||||
shutil.rmtree(temp_migrations_dir)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error generating migration: {e.stderr}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error creating migration: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# If running directly, can optionally pass migration name as argument
|
||||
import sys
|
||||
|
||||
migration_name = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
create_migration(migration_name)
|
@@ -1,32 +0,0 @@
|
||||
component_management:
|
||||
individual_components:
|
||||
- component_id: "Router"
|
||||
paths:
|
||||
- "router"
|
||||
- component_id: "LLMs"
|
||||
paths:
|
||||
- "*/llms/*"
|
||||
- component_id: "Caching"
|
||||
paths:
|
||||
- "*/caching/*"
|
||||
- ".*redis.*"
|
||||
- component_id: "litellm_logging"
|
||||
paths:
|
||||
- "*/integrations/*"
|
||||
- ".*litellm_logging.*"
|
||||
- component_id: "Proxy_Authentication"
|
||||
paths:
|
||||
- "*/proxy/auth/**"
|
||||
comment:
|
||||
layout: "header, diff, flags, components" # show component info in the PR comment
|
||||
|
||||
coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
target: auto
|
||||
threshold: 1% # at maximum allow project coverage to drop by 1%
|
||||
patch:
|
||||
default:
|
||||
target: auto
|
||||
threshold: 0% # patch coverage should be 100%
|
File diff suppressed because one or more lines are too long
@@ -1,406 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "ZwuaylskLxFu",
|
||||
"outputId": "d684d6a3-32fe-4beb-c378-c39134bcf8cc"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting litellm==0.1.363\n",
|
||||
" Downloading litellm-0.1.363-py3-none-any.whl (34 kB)\n",
|
||||
"Requirement already satisfied: openai<0.28.0,>=0.27.8 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.27.8)\n",
|
||||
"Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (1.0.0)\n",
|
||||
"Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from litellm==0.1.363) (0.4.0)\n",
|
||||
"Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (2.31.0)\n",
|
||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.65.0)\n",
|
||||
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.8.5)\n",
|
||||
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.363) (2022.10.31)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.2.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.26.16)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.363) (2023.7.22)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (23.1.0)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (6.0.4)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (4.0.2)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.9.2)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.4.0)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.363) (1.3.1)\n",
|
||||
"Installing collected packages: litellm\n",
|
||||
" Attempting uninstall: litellm\n",
|
||||
" Found existing installation: litellm 0.1.362\n",
|
||||
" Uninstalling litellm-0.1.362:\n",
|
||||
" Successfully uninstalled litellm-0.1.362\n",
|
||||
"Successfully installed litellm-0.1.363\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install litellm==\"0.1.363\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "W216G__XL19Q"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Import litellm & Set env variables\n",
|
||||
"import litellm\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = \" \" #@param"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "ff1lKwUMMLJj",
|
||||
"outputId": "bfddf6f8-36d4-45e5-92dc-349083fa41b8"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
" Result from claude-instant-1 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" The Los Angeles Dodgers won the 2020 World Series, defeating the Tampa Bay Rays 4-2. It was the Dodgers' first World Series title since 1988.\"}}], 'created': 1691536677.2676156, 'model': 'claude-instant-1', 'usage': {'prompt_tokens': 30, 'completion_tokens': 32, 'total_tokens': 62}}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" Result from claude-2 {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ' The Los Angeles Dodgers won'}}], 'created': 1691536677.944753, 'model': 'claude-2', 'usage': {'prompt_tokens': 30, 'completion_tokens': 5, 'total_tokens': 35}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# @title Request Claude Instant-1 and Claude-2\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"result = litellm.completion('claude-instant-1', messages)\n",
|
||||
"print(\"\\n\\n Result from claude-instant-1\", result)\n",
|
||||
"result = litellm.completion('claude-2', messages, max_tokens=5, temperature=0.2)\n",
|
||||
"print(\"\\n\\n Result from claude-2\", result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "06hWKnNQMrV-",
|
||||
"outputId": "7fdec0eb-d4a9-4882-f9c4-987ff9a31114"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Here\n",
|
||||
"'s\n",
|
||||
" a\n",
|
||||
" quick\n",
|
||||
" overview\n",
|
||||
" of\n",
|
||||
" how\n",
|
||||
" a\n",
|
||||
" court\n",
|
||||
" case\n",
|
||||
" can\n",
|
||||
" reach\n",
|
||||
" the\n",
|
||||
" U\n",
|
||||
".\n",
|
||||
"S\n",
|
||||
".\n",
|
||||
" Supreme\n",
|
||||
" Court\n",
|
||||
":\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-\n",
|
||||
" The\n",
|
||||
" case\n",
|
||||
" must\n",
|
||||
" first\n",
|
||||
" be\n",
|
||||
" heard\n",
|
||||
" in\n",
|
||||
" a\n",
|
||||
" lower\n",
|
||||
" trial\n",
|
||||
" court\n",
|
||||
" (\n",
|
||||
"either\n",
|
||||
" a\n",
|
||||
" state\n",
|
||||
" court\n",
|
||||
" or\n",
|
||||
" federal\n",
|
||||
" district\n",
|
||||
" court\n",
|
||||
").\n",
|
||||
" The\n",
|
||||
" trial\n",
|
||||
" court\n",
|
||||
" makes\n",
|
||||
" initial\n",
|
||||
" r\n",
|
||||
"ulings\n",
|
||||
" and\n",
|
||||
" produces\n",
|
||||
" a\n",
|
||||
" record\n",
|
||||
" of\n",
|
||||
" the\n",
|
||||
" case\n",
|
||||
".\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-\n",
|
||||
" The\n",
|
||||
" losing\n",
|
||||
" party\n",
|
||||
" can\n",
|
||||
" appeal\n",
|
||||
" the\n",
|
||||
" decision\n",
|
||||
" to\n",
|
||||
" an\n",
|
||||
" appeals\n",
|
||||
" court\n",
|
||||
" (\n",
|
||||
"a\n",
|
||||
" state\n",
|
||||
" appeals\n",
|
||||
" court\n",
|
||||
" for\n",
|
||||
" state\n",
|
||||
" cases\n",
|
||||
",\n",
|
||||
" or\n",
|
||||
" a\n",
|
||||
" federal\n",
|
||||
" circuit\n",
|
||||
" court\n",
|
||||
" for\n",
|
||||
" federal\n",
|
||||
" cases\n",
|
||||
").\n",
|
||||
" The\n",
|
||||
" appeals\n",
|
||||
" court\n",
|
||||
" reviews\n",
|
||||
" the\n",
|
||||
" trial\n",
|
||||
" court\n",
|
||||
"'s\n",
|
||||
" r\n",
|
||||
"ulings\n",
|
||||
" and\n",
|
||||
" can\n",
|
||||
" affirm\n",
|
||||
",\n",
|
||||
" reverse\n",
|
||||
",\n",
|
||||
" or\n",
|
||||
" modify\n",
|
||||
" the\n",
|
||||
" decision\n",
|
||||
".\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-\n",
|
||||
" If\n",
|
||||
" a\n",
|
||||
" party\n",
|
||||
" is\n",
|
||||
" still\n",
|
||||
" unsat\n",
|
||||
"isf\n",
|
||||
"ied\n",
|
||||
" after\n",
|
||||
" the\n",
|
||||
" appeals\n",
|
||||
" court\n",
|
||||
" rules\n",
|
||||
",\n",
|
||||
" they\n",
|
||||
" can\n",
|
||||
" petition\n",
|
||||
" the\n",
|
||||
" Supreme\n",
|
||||
" Court\n",
|
||||
" to\n",
|
||||
" hear\n",
|
||||
" the\n",
|
||||
" case\n",
|
||||
" through\n",
|
||||
" a\n",
|
||||
" writ\n",
|
||||
" of\n",
|
||||
" cert\n",
|
||||
"ior\n",
|
||||
"ari\n",
|
||||
".\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-\n",
|
||||
" The\n",
|
||||
" Supreme\n",
|
||||
" Court\n",
|
||||
" gets\n",
|
||||
" thousands\n",
|
||||
" of\n",
|
||||
" cert\n",
|
||||
" petitions\n",
|
||||
" every\n",
|
||||
" year\n",
|
||||
" but\n",
|
||||
" usually\n",
|
||||
" only\n",
|
||||
" agrees\n",
|
||||
" to\n",
|
||||
" hear\n",
|
||||
" about\n",
|
||||
" 100\n",
|
||||
"-\n",
|
||||
"150\n",
|
||||
" of\n",
|
||||
" cases\n",
|
||||
" that\n",
|
||||
" have\n",
|
||||
" significant\n",
|
||||
" national\n",
|
||||
" importance\n",
|
||||
" or\n",
|
||||
" where\n",
|
||||
" lower\n",
|
||||
" courts\n",
|
||||
" disagree\n",
|
||||
" on\n",
|
||||
" federal\n",
|
||||
" law\n",
|
||||
".\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-\n",
|
||||
" If\n",
|
||||
" 4\n",
|
||||
" out\n",
|
||||
" of\n",
|
||||
" the\n",
|
||||
" 9\n",
|
||||
" Just\n",
|
||||
"ices\n",
|
||||
" vote\n",
|
||||
" to\n",
|
||||
" grant\n",
|
||||
" cert\n",
|
||||
" (\n",
|
||||
"agree\n",
|
||||
" to\n",
|
||||
" hear\n",
|
||||
" the\n",
|
||||
" case\n",
|
||||
"),\n",
|
||||
" it\n",
|
||||
" goes\n",
|
||||
" on\n",
|
||||
" the\n",
|
||||
" Supreme\n",
|
||||
" Court\n",
|
||||
"'s\n",
|
||||
" do\n",
|
||||
"cket\n",
|
||||
" for\n",
|
||||
" arguments\n",
|
||||
".\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-\n",
|
||||
" The\n",
|
||||
" Supreme\n",
|
||||
" Court\n",
|
||||
" then\n",
|
||||
" hears\n",
|
||||
" oral\n",
|
||||
" arguments\n",
|
||||
",\n",
|
||||
" considers\n",
|
||||
" written\n",
|
||||
" brief\n",
|
||||
"s\n",
|
||||
",\n",
|
||||
" examines\n",
|
||||
" the\n",
|
||||
" lower\n",
|
||||
" court\n",
|
||||
" records\n",
|
||||
",\n",
|
||||
" and\n",
|
||||
" issues\n",
|
||||
" a\n",
|
||||
" final\n",
|
||||
" ruling\n",
|
||||
" on\n",
|
||||
" the\n",
|
||||
" case\n",
|
||||
",\n",
|
||||
" which\n",
|
||||
" serves\n",
|
||||
" as\n",
|
||||
" binding\n",
|
||||
" precedent\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# @title Streaming Example: Request Claude-2\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"how does a court case get to the Supreme Court?\"}\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"result = litellm.completion('claude-2', messages, stream=True)\n",
|
||||
"for part in result:\n",
|
||||
" print(part.choices[0].delta.content or \"\")\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
@@ -1,422 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "BmX0b5Ueh91v"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM - Azure OpenAI + OpenAI Calls\n",
|
||||
"This notebook covers the following for Azure OpenAI + OpenAI:\n",
|
||||
"* Completion - Quick start\n",
|
||||
"* Completion - Streaming\n",
|
||||
"* Completion - Azure, OpenAI in separate threads\n",
|
||||
"* Completion - Stress Test 10 requests in parallel\n",
|
||||
"* Completion - Azure, OpenAI in the same thread"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "iHq4d0dpfawS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "mnveHO5dfcB0"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "eo88QUdbiDIE"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Quick start"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "5OSosWNCfc_2",
|
||||
"outputId": "c52344b1-2458-4695-a7eb-a9b076893348"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Openai Response\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694708958,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 26,\n",
|
||||
" \"total_tokens\": 39\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Azure Response\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694708960,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 27,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 41\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# openai call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Openai Response\\n\")\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# azure call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"azure/your-azure-deployment\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Azure Response\\n\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "dQMkM-diiKdE"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "uVvJDVn4g1i1"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# openai call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"OpenAI Streaming response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n",
|
||||
"\n",
|
||||
"# azure call\n",
|
||||
"response = completion(\n",
|
||||
" model = \"azure/your-azure-deployment\",\n",
|
||||
" messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"Azure Streaming response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4xrOPnt-oqwm"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Azure, OpenAI in separate threads"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "V5b5taJPjvC3"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make a completion call\n",
|
||||
"def make_completion(model, messages):\n",
|
||||
" response = completion(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Response for {model}: {response}\")\n",
|
||||
"\n",
|
||||
"# openai configs\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# azure openai configs\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"# Define the messages for the completions\n",
|
||||
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# Create threads for making the completions\n",
|
||||
"thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n",
|
||||
"thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n",
|
||||
"\n",
|
||||
"# Start both threads\n",
|
||||
"thread1.start()\n",
|
||||
"thread2.start()\n",
|
||||
"\n",
|
||||
"# Wait for both threads to finish\n",
|
||||
"thread1.join()\n",
|
||||
"thread2.join()\n",
|
||||
"\n",
|
||||
"print(\"Both completions are done.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "lx8DbMBqoAoN"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Stress Test 10 requests in parallel\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "pHYANOlOkoDh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import threading\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make a completion call\n",
|
||||
"def make_completion(model, messages):\n",
|
||||
" response = completion(\n",
|
||||
" model=model,\n",
|
||||
" messages=messages\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Response for {model}: {response}\")\n",
|
||||
"\n",
|
||||
"# Set your API keys\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
"# Define the messages for the completions\n",
|
||||
"messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# Create and start 10 threads for making completions\n",
|
||||
"threads = []\n",
|
||||
"for i in range(10):\n",
|
||||
" thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n",
|
||||
" threads.append(thread)\n",
|
||||
" thread.start()\n",
|
||||
"\n",
|
||||
"# Wait for all threads to finish\n",
|
||||
"for thread in threads:\n",
|
||||
" thread.join()\n",
|
||||
"\n",
|
||||
"print(\"All completions are done.\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "yB2NDOO4oxrp"
|
||||
},
|
||||
"source": [
|
||||
"## Completion - Azure, OpenAI in the same thread"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "HTBqwzxpnxab",
|
||||
"outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI Response: {\n",
|
||||
" \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694710847,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"total_tokens\": 42\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Azure OpenAI Response: {\n",
|
||||
" \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694710849,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 43\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Function to make both OpenAI and Azure completions\n",
|
||||
"def make_completions():\n",
|
||||
" # Set your OpenAI API key\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
" # OpenAI completion\n",
|
||||
" openai_response = completion(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"OpenAI Response:\", openai_response)\n",
|
||||
"\n",
|
||||
" # Set your Azure OpenAI API key and configuration\n",
|
||||
" os.environ[\"AZURE_API_KEY\"] = \"\"\n",
|
||||
" os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
" os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"\n",
|
||||
" # Azure OpenAI completion\n",
|
||||
" azure_response = completion(\n",
|
||||
" model=\"azure/your-azure-deployment\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(\"Azure OpenAI Response:\", azure_response)\n",
|
||||
"\n",
|
||||
"# Call the function to make both completions in one thread\n",
|
||||
"make_completions()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,310 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "fNkMBurtxawJ"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM Bedrock Usage\n",
|
||||
"Important Note: For Bedrock Requests you need to ensure you have `pip install boto3>=1.28.57`, boto3 supports bedrock from `boto3>=1.28.57` and higher "
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "htAufI28xeSy"
|
||||
},
|
||||
"source": [
|
||||
"## Pre-Requisites"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "jT5GbPjAuDTp"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm\n",
|
||||
"!pip install boto3>=1.28.57 # this version onwards has bedrock support"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "H4Vu4er2xnfI"
|
||||
},
|
||||
"source": [
|
||||
"## Set Bedrock/AWS Credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"id": "CtTrBthWxp-t"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"AWS_ACCESS_KEY_ID\"] = \"\" # Access key\n",
|
||||
"os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"\" # Secret access key\n",
|
||||
"os.environ[\"AWS_REGION_NAME\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ycRK9NUdx1EI"
|
||||
},
|
||||
"source": [
|
||||
"## Anthropic Requests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tgkuoHa5uLOy",
|
||||
"outputId": "27a78e86-c6a7-4bcc-8559-0813cb978426"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Claude instant 1, response\n",
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm doing well, thanks for asking!\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-4f2e64a1-56d2-43f2-90d3-60ffd6f5086d\",\n",
|
||||
" \"created\": 1696256761.3265705,\n",
|
||||
" \"model\": \"anthropic.claude-instant-v1\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 11,\n",
|
||||
" \"completion_tokens\": 9,\n",
|
||||
" \"total_tokens\": 20\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop_sequence\"\n",
|
||||
"}\n",
|
||||
"Claude v2, response\n",
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm doing well, thanks for asking!\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-34f59b33-f94e-40c2-8bdb-f4af0813405e\",\n",
|
||||
" \"created\": 1696256762.2137017,\n",
|
||||
" \"model\": \"anthropic.claude-v2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 11,\n",
|
||||
" \"completion_tokens\": 9,\n",
|
||||
" \"total_tokens\": 20\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop_sequence\"\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"bedrock/anthropic.claude-instant-v1\",\n",
|
||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Claude instant 1, response\")\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"bedrock/anthropic.claude-v2\",\n",
|
||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
")\n",
|
||||
"print(\"Claude v2, response\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "HnM-HtM3yFMT"
|
||||
},
|
||||
"source": [
|
||||
"## Anthropic Requests - With Streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "_JZvg2yovRsU"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"bedrock/anthropic.claude-instant-v1\",\n",
|
||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True,\n",
|
||||
")\n",
|
||||
"print(\"Claude instant 1, response\")\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"bedrock/anthropic.claude-v2\",\n",
|
||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(\"Claude v2, response\")\n",
|
||||
"print(response)\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "zj1U1mh9zEhP"
|
||||
},
|
||||
"source": [
|
||||
"## A121 Requests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "6wK6MZLovU7r",
|
||||
"outputId": "4cf80c04-f15d-4066-b4c7-113b551538de"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J2 ultra response\n",
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"\\nHi, I'm doing well, thanks for asking! How about you?\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-f2de678f-0e70-4e36-a01f-8b184c2e4d50\",\n",
|
||||
" \"created\": 1696257116.044311,\n",
|
||||
" \"model\": \"ai21.j2-ultra\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 16,\n",
|
||||
" \"total_tokens\": 22\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"J2 mid response\n",
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"\\nGood. And you?\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-420d6bf9-36d8-484b-93b4-4c9e00f7ce2e\",\n",
|
||||
" \"created\": 1696257116.5756805,\n",
|
||||
" \"model\": \"ai21.j2-mid\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 6,\n",
|
||||
" \"total_tokens\": 12\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(\n",
|
||||
" model=\"bedrock/ai21.j2-ultra\",\n",
|
||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
")\n",
|
||||
"print(\"J2 ultra response\")\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"bedrock/ai21.j2-mid\",\n",
|
||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
||||
")\n",
|
||||
"print(\"J2 mid response\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Y5gGZIwzzSON"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
@@ -1,241 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Use LiteLLM to calculate costs for all your completion calls\n",
|
||||
"In this notebook we'll use `litellm.completion_cost` to get completion costs"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "BgWr0PsUR3vV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "ViczFTjsDzSI"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm==0.1.549 # use 0.1.549 or later"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calculating costs for gpt-3.5 turbo completion()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "k_1CWUwmSNtj"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import completion, completion_cost\n",
|
||||
"import os\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"\"\n",
|
||||
"\n",
|
||||
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
"response = completion(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" messages=messages,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"cost = completion_cost(completion_response=response)\n",
|
||||
"formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n",
|
||||
"print(formatted_string)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Tp0fyk-jD0pP",
|
||||
"outputId": "ce885fb3-3237-41b2-9d8b-3fb30bba498b"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"got response\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7vyCApIZaCxP36kb9meUMN2DFSJPh\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694050442,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello! I'm an AI and I don't have feelings, but I'm here to help you. How can I assist you today?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 28,\n",
|
||||
" \"total_tokens\": 41\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Cost for completion call: $0.0000755000\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calculating costs for Together Computer completion()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "AjDs4G-uS6PS"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import completion, completion_cost\n",
|
||||
"import os\n",
|
||||
"os.environ['TOGETHERAI_API_KEY'] = \"\"\n",
|
||||
"\n",
|
||||
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
"response = completion(\n",
|
||||
" model=\"togethercomputer/llama-2-70b-chat\",\n",
|
||||
" messages=messages,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"cost = completion_cost(completion_response=response)\n",
|
||||
"formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n",
|
||||
"print(formatted_string)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "jMPsUV-KEa6a",
|
||||
"outputId": "7a69b291-f149-4b9c-8a78-9c8142bac759"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"Hello! I'm doing well, thanks for asking. I hope you're having a great\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694050771.2821715,\n",
|
||||
" \"model\": \"togethercomputer/llama-2-70b-chat\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 12,\n",
|
||||
" \"completion_tokens\": 18,\n",
|
||||
" \"total_tokens\": 30\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"Cost for completion call: $0.0000900000\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calculating costs for Replicate Llama2 completion()"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "vEa4s6-7TANS"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import completion, completion_cost\n",
|
||||
"import os\n",
|
||||
"os.environ['REPLICATE_API_KEY'] = \"\"\n",
|
||||
"\n",
|
||||
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
"response = completion(\n",
|
||||
" model=\"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf\",\n",
|
||||
" messages=messages,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response)\n",
|
||||
"\n",
|
||||
"cost = completion_cost(completion_response=response)\n",
|
||||
"formatted_string = f\"Cost for completion call: ${float(cost):.10f}\"\n",
|
||||
"print(formatted_string)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Xf1TKRDuS1bR",
|
||||
"outputId": "cfb2b484-a6e5-41ad-86c5-7e66aba27648"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" Hello! I'm doing well, thanks for asking. How about you? Is there anything you need help with today?\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694050893.4534576,\n",
|
||||
" \"model\": \"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 24,\n",
|
||||
" \"total_tokens\": 30\n",
|
||||
" },\n",
|
||||
" \"ended\": 1694050896.6689413\n",
|
||||
"}\n",
|
||||
"total_replicate_run_time 3.2154836654663086\n",
|
||||
"Cost for completion call: $0.0045016771\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,252 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "9dKM5k8qsMIj"
|
||||
},
|
||||
"source": [
|
||||
"## LiteLLM Hugging Face\n",
|
||||
"\n",
|
||||
"Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "BVDdmCp-o97j"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "yp5UXRqtpu9f"
|
||||
},
|
||||
"source": [
|
||||
"## Serverless Inference Providers\n",
|
||||
"\n",
|
||||
"Read more about Inference Providers here: https://huggingface.co/blog/inference-providers.\n",
|
||||
"\n",
|
||||
"In order to use litellm with Hugging Face Inference Providers, you need to set `model=huggingface/<provider>/<model-id>`.\n",
|
||||
"\n",
|
||||
"Example: `huggingface/together/deepseek-ai/DeepSeek-R1` to run DeepSeek-R1 (https://huggingface.co/deepseek-ai/DeepSeek-R1) through Together AI.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Pi5Oww8gpCUm",
|
||||
"outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# You can create a HF token here: https://huggingface.co/settings/tokens\n",
|
||||
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||
"\n",
|
||||
"# Call DeepSeek-R1 model through Together AI\n",
|
||||
"response = completion(\n",
|
||||
" model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n",
|
||||
" messages=[{\"content\": \"How many r's are in the word `strawberry`?\", \"role\": \"user\"}],\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "EU0UubrKzTFe"
|
||||
},
|
||||
"source": [
|
||||
"## Streaming\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "y-QfIvA-uJKX",
|
||||
"outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"How many r's are in the word `strawberry`?\",\n",
|
||||
" \n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" stream=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## With images as input\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"# Set your Hugging Face Token\n",
|
||||
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\"type\": \"text\", \"text\": \"What's in this image?\"},\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\n",
|
||||
" \"url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\",\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct\",\n",
|
||||
" messages=messages,\n",
|
||||
")\n",
|
||||
"print(response.choices[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools - Function Calling\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Set your Hugging Face Token\n",
|
||||
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||
"\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"],\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"huggingface/sambanova/meta-llama/Llama-3.1-8B-Instruct\", messages=messages, tools=tools, tool_choice=\"auto\"\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hugging Face Dedicated Inference Endpoints\n",
|
||||
"\n",
|
||||
"Steps to use\n",
|
||||
"\n",
|
||||
"- Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n",
|
||||
"- Set `api_base` to your deployed api base\n",
|
||||
"- set the model to `huggingface/tgi` so that litellm knows it's a huggingface Deployed Inference Endpoint.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import litellm\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"response = litellm.completion(\n",
|
||||
" model=\"huggingface/tgi\",\n",
|
||||
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}],\n",
|
||||
" api_base=\"https://my-endpoint.endpoints.huggingface.cloud/v1/\",\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,97 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "iFEmsVJI_2BR"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM NovitaAI Cookbook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cBlUhCEP_xj4"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "p-MQqWOT_1a7"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ['NOVITA_API_KEY'] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Ze8JqMqWAARO"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"response = completion(\n",
|
||||
" model=\"novita/deepseek/deepseek-r1\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "-LnhELrnAM_J"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = completion(\n",
|
||||
" model=\"novita/deepseek/deepseek-r1\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "dJBOUYdwCEn1"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = completion(\n",
|
||||
" model=\"mistralai/mistral-7b-instruct\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,179 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "iFEmsVJI_2BR"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM OpenRouter Cookbook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cBlUhCEP_xj4"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"id": "p-MQqWOT_1a7"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ['OPENROUTER_API_KEY'] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Ze8JqMqWAARO",
|
||||
"outputId": "64f3e836-69fa-4f8e-fb35-088a913bbe98"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject id=gen-W8FTMSIEorCp3vG5iYIgNMR4IeBv at 0x7c3dcef1f060> JSON: {\n",
|
||||
" \"id\": \"gen-W8FTMSIEorCp3vG5iYIgNMR4IeBv\",\n",
|
||||
" \"model\": \"chat-bison@001\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"```\\n#include <stdio.h>\\n\\nint main() {\\n printf(\\\"Hi!\\\\n\\\");\\n return 0;\\n}\\n```\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"response_ms\": 7817.777999999999\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"response = completion(\n",
|
||||
" model=\"openrouter/google/palm-2-chat-bison\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "-LnhELrnAM_J",
|
||||
"outputId": "d51c7ab7-d761-4bd1-f849-1534d9df4cd0"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject id=gen-IiuV7ZNimDufVeutBHrl8ajPuzEh at 0x7c3dcea67560> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \" Here is some simple code to print \\\"Hi\\\":\\n\\n```python\\nprint(\\\"Hi\\\")\\n```\\n\\nThis uses the print() function in Python to output the text \\\"Hi\\\".\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop_sequence\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"model\": \"claude-2.0\",\n",
|
||||
" \"id\": \"gen-IiuV7ZNimDufVeutBHrl8ajPuzEh\",\n",
|
||||
" \"response_ms\": 8112.443000000001\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(\n",
|
||||
" model=\"openrouter/anthropic/claude-2\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "dJBOUYdwCEn1",
|
||||
"outputId": "ffa18679-ec15-4dad-fe2b-68665cdf36b0"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject id=gen-PyMd3yyJ0aQsCgIY9R8XGZoAtPbl at 0x7c3dceefcae0> JSON: {\n",
|
||||
" \"id\": \"gen-PyMd3yyJ0aQsCgIY9R8XGZoAtPbl\",\n",
|
||||
" \"model\": \"togethercomputer/llama-2-70b-chat\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"*gives a sly smile as they type*\\n\\nHey there, handsome. \\ud83d\\ude0f\\n\\nWhat brings you to my neck of the woods today? \\ud83d\\ude18\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"response_ms\": 9618.775\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(\n",
|
||||
" model=\"openrouter/meta-llama/llama-2-70b-chat\",\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": \"write code for saying hi\"}]\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,568 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "dwGtLi_tvM6N"
|
||||
},
|
||||
"source": [
|
||||
"# Using LiteLLM with Petals"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "bdlgaWQqDpzj"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm # 0.1.715 and upwards"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "5Id2QKwOEH8X"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install petals\n",
|
||||
"!pip install git+https://github.com/bigscience-workshop/petals"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "k42fldw3veSN"
|
||||
},
|
||||
"source": [
|
||||
"## petals-team/StableBeluga2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tIHcEHdSDqju",
|
||||
"outputId": "485dbf54-395c-433a-bbf4-8eb70a9fa624"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
|
||||
"Sep 19 18:39:50.634 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n",
|
||||
"Sep 19 18:39:50.639 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: StableBeluga2-hf\n",
|
||||
"Sep 19 18:40:13.920 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:40 via …HfQWVM => 40:80 via …Zj98Se\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"Hello, how are you?\\nI'm doing well, thank you. I'm just getting ready to go to the gym.\\nOh, that's great. I'm trying to get back into a workout routine myself.\\nYeah,\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-f09d79b3-c1d1-49b7-b55f-cd8dfa1043bf\",\n",
|
||||
" \"created\": 1695148897.473613,\n",
|
||||
" \"model\": \"petals-team/StableBeluga2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 45,\n",
|
||||
" \"total_tokens\": 51\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"response = completion(model=\"petals/petals-team/StableBeluga2\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], max_tokens=50)\n",
|
||||
"\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "J8DubRnHvh_j"
|
||||
},
|
||||
"source": [
|
||||
"## huggyllama/llama-65b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 538,
|
||||
"referenced_widgets": [
|
||||
"2fec5cc400424671a3d517327117d18a",
|
||||
"3687c76fe84d464baaf35366b21e83b3",
|
||||
"c29d4460dbaa441cae110b58e0014151",
|
||||
"6560449a38bf4a7bacd97ccaacf01c4c",
|
||||
"5fbd6ae281984d28ba59ebfd0279eda7",
|
||||
"323e30e275434aeea241163e5f1f9031",
|
||||
"48f4adec51c94f9da6e4c4564daeff84",
|
||||
"2a672981a44b4a7fb30674f97f4c10c6",
|
||||
"d75ae8d22ea74840b4c80c8f386384c4",
|
||||
"54c06312ecff4e7588665e8b0cb7118b",
|
||||
"300078a9d1a6483fba81a4be63793ff7"
|
||||
]
|
||||
},
|
||||
"id": "IlTCJwDsNvgF",
|
||||
"outputId": "2e84d125-d982-48ed-8a92-6ca438a50d0c"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Sep 19 18:41:37.912 [\u001b[1m\u001b[34mINFO\u001b[0m] Make sure you follow the LLaMA's terms of use: https://bit.ly/llama2-license for LLaMA 2, https://bit.ly/llama-license for LLaMA 1\n",
|
||||
"Sep 19 18:41:37.914 [\u001b[1m\u001b[34mINFO\u001b[0m] Using DHT prefix: llama-65b-hf\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "2fec5cc400424671a3d517327117d18a",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:362: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.2` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
|
||||
" warnings.warn(\n",
|
||||
"Sep 19 18:41:48.396 [\u001b[1m\u001b[34mINFO\u001b[0m] Route found: 0:80 via …g634yJ\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"Hello, how are you?\\nI'm fine, thank you. And\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-3496e6eb-2a27-4f94-8d75-70648eacd88f\",\n",
|
||||
" \"created\": 1695148912.9116046,\n",
|
||||
" \"model\": \"huggyllama/llama-65b\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 14,\n",
|
||||
" \"total_tokens\": 20\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(model=\"petals/huggyllama/llama-65b\", messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}], temperature=0.2, max_tokens=10)\n",
|
||||
"\n",
|
||||
"print(response)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"widgets": {
|
||||
"application/vnd.jupyter.widget-state+json": {
|
||||
"2a672981a44b4a7fb30674f97f4c10c6": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_module_version": "1.2.0",
|
||||
"model_name": "LayoutModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"2fec5cc400424671a3d517327117d18a": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "HBoxModel",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "HBoxModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "HBoxView",
|
||||
"box_style": "",
|
||||
"children": [
|
||||
"IPY_MODEL_3687c76fe84d464baaf35366b21e83b3",
|
||||
"IPY_MODEL_c29d4460dbaa441cae110b58e0014151",
|
||||
"IPY_MODEL_6560449a38bf4a7bacd97ccaacf01c4c"
|
||||
],
|
||||
"layout": "IPY_MODEL_5fbd6ae281984d28ba59ebfd0279eda7"
|
||||
}
|
||||
},
|
||||
"300078a9d1a6483fba81a4be63793ff7": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "DescriptionStyleModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "DescriptionStyleModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "StyleView",
|
||||
"description_width": ""
|
||||
}
|
||||
},
|
||||
"323e30e275434aeea241163e5f1f9031": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_module_version": "1.2.0",
|
||||
"model_name": "LayoutModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"3687c76fe84d464baaf35366b21e83b3": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "HTMLModel",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "HTMLModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "HTMLView",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_323e30e275434aeea241163e5f1f9031",
|
||||
"placeholder": "",
|
||||
"style": "IPY_MODEL_48f4adec51c94f9da6e4c4564daeff84",
|
||||
"value": "Loading checkpoint shards: 100%"
|
||||
}
|
||||
},
|
||||
"48f4adec51c94f9da6e4c4564daeff84": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "DescriptionStyleModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "DescriptionStyleModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "StyleView",
|
||||
"description_width": ""
|
||||
}
|
||||
},
|
||||
"54c06312ecff4e7588665e8b0cb7118b": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_module_version": "1.2.0",
|
||||
"model_name": "LayoutModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"5fbd6ae281984d28ba59ebfd0279eda7": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_module_version": "1.2.0",
|
||||
"model_name": "LayoutModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/base",
|
||||
"_model_module_version": "1.2.0",
|
||||
"_model_name": "LayoutModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "LayoutView",
|
||||
"align_content": null,
|
||||
"align_items": null,
|
||||
"align_self": null,
|
||||
"border": null,
|
||||
"bottom": null,
|
||||
"display": null,
|
||||
"flex": null,
|
||||
"flex_flow": null,
|
||||
"grid_area": null,
|
||||
"grid_auto_columns": null,
|
||||
"grid_auto_flow": null,
|
||||
"grid_auto_rows": null,
|
||||
"grid_column": null,
|
||||
"grid_gap": null,
|
||||
"grid_row": null,
|
||||
"grid_template_areas": null,
|
||||
"grid_template_columns": null,
|
||||
"grid_template_rows": null,
|
||||
"height": null,
|
||||
"justify_content": null,
|
||||
"justify_items": null,
|
||||
"left": null,
|
||||
"margin": null,
|
||||
"max_height": null,
|
||||
"max_width": null,
|
||||
"min_height": null,
|
||||
"min_width": null,
|
||||
"object_fit": null,
|
||||
"object_position": null,
|
||||
"order": null,
|
||||
"overflow": null,
|
||||
"overflow_x": null,
|
||||
"overflow_y": null,
|
||||
"padding": null,
|
||||
"right": null,
|
||||
"top": null,
|
||||
"visibility": null,
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"6560449a38bf4a7bacd97ccaacf01c4c": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "HTMLModel",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "HTMLModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "HTMLView",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_54c06312ecff4e7588665e8b0cb7118b",
|
||||
"placeholder": "",
|
||||
"style": "IPY_MODEL_300078a9d1a6483fba81a4be63793ff7",
|
||||
"value": " 2/2 [00:00<00:00, 2.36it/s]"
|
||||
}
|
||||
},
|
||||
"c29d4460dbaa441cae110b58e0014151": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "FloatProgressModel",
|
||||
"state": {
|
||||
"_dom_classes": [],
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "FloatProgressModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/controls",
|
||||
"_view_module_version": "1.5.0",
|
||||
"_view_name": "ProgressView",
|
||||
"bar_style": "success",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_2a672981a44b4a7fb30674f97f4c10c6",
|
||||
"max": 2,
|
||||
"min": 0,
|
||||
"orientation": "horizontal",
|
||||
"style": "IPY_MODEL_d75ae8d22ea74840b4c80c8f386384c4",
|
||||
"value": 2
|
||||
}
|
||||
},
|
||||
"d75ae8d22ea74840b4c80c8f386384c4": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_module_version": "1.5.0",
|
||||
"model_name": "ProgressStyleModel",
|
||||
"state": {
|
||||
"_model_module": "@jupyter-widgets/controls",
|
||||
"_model_module_version": "1.5.0",
|
||||
"_model_name": "ProgressStyleModel",
|
||||
"_view_count": null,
|
||||
"_view_module": "@jupyter-widgets/base",
|
||||
"_view_module_version": "1.2.0",
|
||||
"_view_name": "StyleView",
|
||||
"bar_color": null,
|
||||
"description_width": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
@@ -1,224 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "JRCXfhACct4Y"
|
||||
},
|
||||
"source": [
|
||||
"## User Based Rate Limiting Using LiteLLM\n",
|
||||
"- LiteLLM allows you to set budgets per user\n",
|
||||
"- Check if a given user has cross their allocated budget\n",
|
||||
"\n",
|
||||
"In this notebook we create a $0.0002 daily budget per user and make completion calls using the litellm budget manager"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "fl1kcLG8aaIV"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm uuid"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "zqRrpoQ3c6oQ"
|
||||
},
|
||||
"source": [
|
||||
"## Imports & Env variables"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"id": "CSkz8bmwdD3w"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"import os\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ktqe3gSmdFQ4"
|
||||
},
|
||||
"source": [
|
||||
"## completion() with the budget manager\n",
|
||||
"\n",
|
||||
"This code does the following\n",
|
||||
"- Initializes a litellm.BudgetManager()\n",
|
||||
"- Checks if a budget exists for a user\n",
|
||||
" - Creates a $0.0002 budget if the user does not exisr\n",
|
||||
"- Makes a `litellm.completion()` request only if the user is under their budget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "pUN48YvmaiRU",
|
||||
"outputId": "082d6a8b-9aef-4794-9eac-7ba9823ea373"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"No budget exists for user: 29af95f8-c3c6-4c8c-b080-8b2d18d25432\n",
|
||||
"\n",
|
||||
"Creating a budget for user: 29af95f8-c3c6-4c8c-b080-8b2d18d25432, daily budget $0.0002\n",
|
||||
"\n",
|
||||
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $0, budget for user: $0.0002\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7yAUkHQV8xdfldzzZnnnuVU8pl31b\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1694574378,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Hello! I'm an AI, so I don't have emotions, but I'm here to assist you. How can I help you today?\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"completion_tokens\": 29,\n",
|
||||
" \"total_tokens\": 43\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'status': 'success'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import BudgetManager, completion\n",
|
||||
"\n",
|
||||
"# Initializes a litellm.BudgetManager()\n",
|
||||
"budget_manager = BudgetManager(project_name=\"liteLLM_project\", client_type=\"hosted\") # see https://docs.litellm.ai/docs/budget_manager\n",
|
||||
"\n",
|
||||
"user_id = str(uuid.uuid4()) # create a new user id\n",
|
||||
"daily_budget = 0.0002\n",
|
||||
"\n",
|
||||
"# Checks if a budget exists for a user\n",
|
||||
"if not budget_manager.is_valid_user(user_id):\n",
|
||||
" # Creates a $0.0002 budget if the user does not exisr\n",
|
||||
" print(f\"No budget exists for user: {user_id}\\n\")\n",
|
||||
" print(f\"Creating a budget for user: {user_id}, daily budget ${daily_budget}\\n\")\n",
|
||||
" budget_manager.create_budget(total_budget=daily_budget, user=user_id, duration=\"daily\") # duration can be daily, weekly, monthly\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Makes a `litellm.completion()` request only if the user is under their budget\n",
|
||||
"current_spend_for_user = budget_manager.get_current_cost(user=user_id)\n",
|
||||
"budget_for_user = budget_manager.get_total_budget(user_id)\n",
|
||||
"print(f\"User: {user_id} has spent ${current_spend_for_user}, budget for user: ${budget_for_user}\\n\")\n",
|
||||
"\n",
|
||||
"if current_spend_for_user <= budget_for_user:\n",
|
||||
" response = completion(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": \"Hey, how's it going?\"}])\n",
|
||||
" budget_manager.update_cost(completion_obj=response, user=user_id)\n",
|
||||
"else:\n",
|
||||
" response = \"Sorry - no budget!\"\n",
|
||||
"\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "yMOirNoBfmmc"
|
||||
},
|
||||
"source": [
|
||||
"## Make 10 calls to cross the budget per user\n",
|
||||
"- Code fails after user crossed their budget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "apKF3H-xbFXc",
|
||||
"outputId": "1c6ef0fe-e27e-4ead-adc6-2c7eb0214e44"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $7.9e-05, budget for user: $0.0002\n",
|
||||
"\n",
|
||||
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $0.00015999999999999999, budget for user: $0.0002\n",
|
||||
"\n",
|
||||
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has spent $0.00023899999999999998, budget for user: $0.0002\n",
|
||||
"\n",
|
||||
"User: 29af95f8-c3c6-4c8c-b080-8b2d18d25432 has exceeded budget, current spend $0.00023899999999999998, budget for user: $0.0002\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"user_id = \"29af95f8-c3c6-4c8c-b080-8b2d18d25432\" # set in the previous cell\n",
|
||||
"\n",
|
||||
"for _ in range(10):\n",
|
||||
" # check if a given call can be made\n",
|
||||
" current_spend_for_user = budget_manager.get_current_cost(user=user_id)\n",
|
||||
" budget_for_user = budget_manager.get_total_budget(user_id)\n",
|
||||
" print(f\"User: {user_id} has spent ${current_spend_for_user}, budget for user: ${budget_for_user}\\n\")\n",
|
||||
" if current_spend_for_user <= budget_for_user:\n",
|
||||
" response = completion(model=\"gpt-3.5-turbo\", messages=[{\"role\": \"user\", \"content\": \"Hey, how's it going?\"}])\n",
|
||||
" budget_manager.update_cost(completion_obj=response, user=user_id)\n",
|
||||
" else:\n",
|
||||
" response = \"Sorry - no budget!\"\n",
|
||||
" print(f\"User: {user_id} has exceeded budget, current spend ${current_spend_for_user}, budget for user: ${budget_for_user}\\n\")\n",
|
||||
" break # no more requests\n",
|
||||
"\n",
|
||||
" # print(response)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,163 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "MbLbs1tbISk-"
|
||||
},
|
||||
"source": [
|
||||
"# LiteLLM Batch Completions Example\n",
|
||||
"\n",
|
||||
"* This tutorial walks through using `batch_completion`\n",
|
||||
"* Docs: https://docs.litellm.ai/docs/completion/batching"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Ty6-ko_aDlPF"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "KGhNJRUCIh1j"
|
||||
},
|
||||
"source": [
|
||||
"## Import Batch Completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"id": "LOtI43snDrSK"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import batch_completion\n",
|
||||
"\n",
|
||||
"# set your API_KEY\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Xhv92NBaIpaw"
|
||||
},
|
||||
"source": [
|
||||
"## Calling `litellm.batch_completion`\n",
|
||||
"\n",
|
||||
"In the batch_completion method, you provide a list of messages where each sub-list of messages is passed to litellm.completion(), allowing you to process multiple prompts efficiently in a single API call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "yY7GIRLsDywu",
|
||||
"outputId": "009ea67f-95d5-462b-947f-b0d21e60c5bb"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<ModelResponse at 0x7a164eed4450> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" Good morning!\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694030351.309254,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 11,\n",
|
||||
" \"completion_tokens\": 3,\n",
|
||||
" \"total_tokens\": 14\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" <ModelResponse at 0x7a164eed5800> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm an AI assistant created by Anthropic. I don't actually have a concept of the current time.\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694030352.1215081,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 22,\n",
|
||||
" \"total_tokens\": 35\n",
|
||||
" }\n",
|
||||
" }]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"responses = batch_completion(\n",
|
||||
" model=\"claude-2\",\n",
|
||||
" messages = [\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"good morning? \"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"what's the time? \"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"responses"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,565 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Migrating to LiteLLM Proxy from OpenAI/Azure OpenAI\n",
|
||||
"\n",
|
||||
"Covers:\n",
|
||||
"\n",
|
||||
"* /chat/completion\n",
|
||||
"* /embedding\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"These are **selected examples**. LiteLLM Proxy is **OpenAI-Compatible**, it works with any project that calls OpenAI. Just change the `base_url`, `api_key` and `model`.\n",
|
||||
"\n",
|
||||
"For more examples, [go here](https://docs.litellm.ai/docs/proxy/user_keys)\n",
|
||||
"\n",
|
||||
"To pass provider-specific args, [go here](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)\n",
|
||||
"\n",
|
||||
"To drop unsupported params (E.g. frequency_penalty for bedrock with librechat), [go here](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "kccfk0mHZ4Ad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## /chat/completion\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "nmSClzCPaGH6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### OpenAI Python SDK"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "_vqcjwOVaKpO"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "x1e_Ok3KZzeP"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"client = openai.OpenAI(\n",
|
||||
" api_key=\"anything\",\n",
|
||||
" base_url=\"http://0.0.0.0:4000\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# request sent to model set on litellm proxy, `litellm --model`\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" messages = [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"this is a test request, write a short poem\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n",
|
||||
" \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n",
|
||||
" \"generation_name\": \"ishaan-generation-openai-client\",\n",
|
||||
" \"generation_id\": \"openai-client-gen-id22\",\n",
|
||||
" \"trace_id\": \"openai-client-trace-id22\",\n",
|
||||
" \"trace_user_id\": \"openai-client-user-id2\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Function Calling"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "AqkyKk9Scxgj"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from openai import OpenAI\n",
|
||||
"client = OpenAI(\n",
|
||||
" api_key=\"sk-1234\", # [OPTIONAL] set if you set one on proxy, else set \"\"\n",
|
||||
" base_url=\"http://0.0.0.0:4000\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"],\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n",
|
||||
"completion = client.chat.completions.create(\n",
|
||||
" model=\"gpt-4o\", # use 'model_name' from config.yaml\n",
|
||||
" messages=messages,\n",
|
||||
" tools=tools,\n",
|
||||
" tool_choice=\"auto\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(completion)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "wDg10VqLczE1"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Azure OpenAI Python SDK"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "YYoxLloSaNWW"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"client = openai.AzureOpenAI(\n",
|
||||
" api_key=\"anything\",\n",
|
||||
" base_url=\"http://0.0.0.0:4000\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# request sent to model set on litellm proxy, `litellm --model`\n",
|
||||
"response = client.chat.completions.create(\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" messages = [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"this is a test request, write a short poem\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n",
|
||||
" \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n",
|
||||
" \"generation_name\": \"ishaan-generation-openai-client\",\n",
|
||||
" \"generation_id\": \"openai-client-gen-id22\",\n",
|
||||
" \"trace_id\": \"openai-client-trace-id22\",\n",
|
||||
" \"trace_user_id\": \"openai-client-user-id2\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "yA1XcgowaSRy"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Langchain Python"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "yl9qhDvnaTpL"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.prompts.chat import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"anything\"\n",
|
||||
"\n",
|
||||
"chat = ChatOpenAI(\n",
|
||||
" openai_api_base=\"http://0.0.0.0:4000\",\n",
|
||||
" model = \"gpt-3.5-turbo\",\n",
|
||||
" temperature=0.1,\n",
|
||||
" extra_body={\n",
|
||||
" \"metadata\": {\n",
|
||||
" \"generation_name\": \"ishaan-generation-langchain-client\",\n",
|
||||
" \"generation_id\": \"langchain-client-gen-id22\",\n",
|
||||
" \"trace_id\": \"langchain-client-trace-id22\",\n",
|
||||
" \"trace_user_id\": \"langchain-client-user-id2\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=\"You are a helpful assistant that im using to make a test request to.\"\n",
|
||||
" ),\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"test from litellm. tell me why it's amazing in 1 sentence\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"response = chat(messages)\n",
|
||||
"\n",
|
||||
"print(response)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "5MUZgSquaW5t"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Curl"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "B9eMgnULbRaz"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"curl -X POST 'http://0.0.0.0:4000/chat/completions' \\\n",
|
||||
" -H 'Content-Type: application/json' \\\n",
|
||||
" -d '{\n",
|
||||
" \"model\": \"gpt-3.5-turbo\",\n",
|
||||
" \"messages\": [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"what llm are you\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"metadata\": {\n",
|
||||
" \"generation_name\": \"ishaan-test-generation\",\n",
|
||||
" \"generation_id\": \"gen-id22\",\n",
|
||||
" \"trace_id\": \"trace-id22\",\n",
|
||||
" \"trace_user_id\": \"user-id2\"\n",
|
||||
" }\n",
|
||||
"}'\n",
|
||||
"```\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "VWCCk5PFcmhS"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### LlamaIndex"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "drBAm2e1b6xe"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os, dotenv\n",
|
||||
"\n",
|
||||
"from llama_index.llms import AzureOpenAI\n",
|
||||
"from llama_index.embeddings import AzureOpenAIEmbedding\n",
|
||||
"from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
|
||||
"\n",
|
||||
"llm = AzureOpenAI(\n",
|
||||
" engine=\"azure-gpt-3.5\", # model_name on litellm proxy\n",
|
||||
" temperature=0.0,\n",
|
||||
" azure_endpoint=\"http://0.0.0.0:4000\", # litellm proxy endpoint\n",
|
||||
" api_key=\"sk-1234\", # litellm proxy API Key\n",
|
||||
" api_version=\"2023-07-01-preview\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"embed_model = AzureOpenAIEmbedding(\n",
|
||||
" deployment_name=\"azure-embedding-model\",\n",
|
||||
" azure_endpoint=\"http://0.0.0.0:4000\",\n",
|
||||
" api_key=\"sk-1234\",\n",
|
||||
" api_version=\"2023-07-01-preview\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"documents = SimpleDirectoryReader(\"llama_index_data\").load_data()\n",
|
||||
"service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n",
|
||||
"index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n",
|
||||
"\n",
|
||||
"query_engine = index.as_query_engine()\n",
|
||||
"response = query_engine.query(\"What did the author do growing up?\")\n",
|
||||
"print(response)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "d0bZcv8fb9mL"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Langchain JS"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "xypvNdHnb-Yy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import { ChatOpenAI } from \"@langchain/openai\";\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"const model = new ChatOpenAI({\n",
|
||||
" modelName: \"gpt-4\",\n",
|
||||
" openAIApiKey: \"sk-1234\",\n",
|
||||
" modelKwargs: {\"metadata\": \"hello world\"} // 👈 PASS Additional params here\n",
|
||||
"}, {\n",
|
||||
" basePath: \"http://0.0.0.0:4000\",\n",
|
||||
"});\n",
|
||||
"\n",
|
||||
"const message = await model.invoke(\"Hi there!\");\n",
|
||||
"\n",
|
||||
"console.log(message);\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "R55mK2vCcBN2"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### OpenAI JS"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "nC4bLifCcCiW"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"const { OpenAI } = require('openai');\n",
|
||||
"\n",
|
||||
"const openai = new OpenAI({\n",
|
||||
" apiKey: \"sk-1234\", // This is the default and can be omitted\n",
|
||||
" baseURL: \"http://0.0.0.0:4000\"\n",
|
||||
"});\n",
|
||||
"\n",
|
||||
"async function main() {\n",
|
||||
" const chatCompletion = await openai.chat.completions.create({\n",
|
||||
" messages: [{ role: 'user', content: 'Say this is a test' }],\n",
|
||||
" model: 'gpt-3.5-turbo',\n",
|
||||
" }, {\"metadata\": {\n",
|
||||
" \"generation_name\": \"ishaan-generation-openaijs-client\",\n",
|
||||
" \"generation_id\": \"openaijs-client-gen-id22\",\n",
|
||||
" \"trace_id\": \"openaijs-client-trace-id22\",\n",
|
||||
" \"trace_user_id\": \"openaijs-client-user-id2\"\n",
|
||||
" }});\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"main();\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "MICH8kIMcFpg"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Anthropic SDK"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "D1Q07pEAcGTb"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from anthropic import Anthropic\n",
|
||||
"\n",
|
||||
"client = Anthropic(\n",
|
||||
" base_url=\"http://localhost:4000\", # proxy endpoint\n",
|
||||
" api_key=\"sk-s4xN1IiLTCytwtZFJaYQrA\", # litellm proxy virtual key\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"message = client.messages.create(\n",
|
||||
" max_tokens=1024,\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello, Claude\",\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" model=\"claude-3-opus-20240229\",\n",
|
||||
")\n",
|
||||
"print(message.content)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "qBjFcAvgcI3t"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## /embeddings"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "dFAR4AJGcONI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### OpenAI Python SDK"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lgNoM281cRzR"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"# set base_url to your proxy server\n",
|
||||
"# set api_key to send to proxy server\n",
|
||||
"client = OpenAI(api_key=\"<proxy-api-key>\", base_url=\"http://0.0.0.0:4000\")\n",
|
||||
"\n",
|
||||
"response = client.embeddings.create(\n",
|
||||
" input=[\"hello from litellm\"],\n",
|
||||
" model=\"text-embedding-ada-002\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "NY3DJhPfcQhA"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Langchain Embeddings"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "hmbg-DW6cUZs"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"sagemaker-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"text = \"This is a test document.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text)\n",
|
||||
"\n",
|
||||
"print(f\"SAGEMAKER EMBEDDINGS\")\n",
|
||||
"print(query_result[:5])\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"bedrock-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n",
|
||||
"\n",
|
||||
"text = \"This is a test document.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text)\n",
|
||||
"\n",
|
||||
"print(f\"BEDROCK EMBEDDINGS\")\n",
|
||||
"print(query_result[:5])\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"bedrock-titan-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n",
|
||||
"\n",
|
||||
"text = \"This is a test document.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text)\n",
|
||||
"\n",
|
||||
"print(f\"TITAN EMBEDDINGS\")\n",
|
||||
"print(query_result[:5])"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lX2S8Nl1cWVP"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Curl Request"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "oqGbWBCQcYfd"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"```curl\n",
|
||||
"curl -X POST 'http://0.0.0.0:4000/embeddings' \\\n",
|
||||
" -H 'Content-Type: application/json' \\\n",
|
||||
" -d ' {\n",
|
||||
" \"model\": \"text-embedding-ada-002\",\n",
|
||||
" \"input\": [\"write a litellm poem\"]\n",
|
||||
" }'\n",
|
||||
"```\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "7rkIMV9LcdwQ"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,478 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This is a tutorial on using Parallel function calling with LiteLLM"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "gHwFJ-srdnku"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "RrtHuVHlZmUe"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"This tutorial walks through the steps doing parallel function calling using\n",
|
||||
" - OpenAI\n",
|
||||
" - Azure OpenAI"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "sG5ANaazjU0g"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# set openai api key\n",
|
||||
"import os\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"\" # litellm reads OPENAI_API_KEY from .env and sends the request"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "l4GQ-M5yZ5UW"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"\n",
|
||||
"# OpenAI gpt-3.5-turbo-1106\n",
|
||||
"## Step 1: send the conversation and available functions to the model"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "AxgR2fCgaRoW"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"import json\n",
|
||||
"# Example dummy function hard coded to return the same weather\n",
|
||||
"# In production, this could be your backend API or an external API\n",
|
||||
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
|
||||
" \"\"\"Get the current weather in a given location\"\"\"\n",
|
||||
" if \"tokyo\" in location.lower():\n",
|
||||
" return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
|
||||
" elif \"san francisco\" in location.lower():\n",
|
||||
" return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
|
||||
" elif \"paris\" in location.lower():\n",
|
||||
" return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
|
||||
" else:\n",
|
||||
" return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"],\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = litellm.completion(\n",
|
||||
" model=\"gpt-3.5-turbo-1106\",\n",
|
||||
" messages=messages,\n",
|
||||
" tools=tools,\n",
|
||||
" tool_choice=\"auto\", # auto is default, but we'll be explicit\n",
|
||||
")\n",
|
||||
"print(\"\\nLLM Response1:\\n\", response)\n",
|
||||
"response_message = response.choices[0].message\n",
|
||||
"tool_calls = response.choices[0].message.tool_calls\n",
|
||||
"print(\"\\nTool Choice:\\n\", tool_calls)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "Y3qteFo8ZrZP",
|
||||
"outputId": "ee6c1183-55c1-4111-cdc0-967b8fed9db3"
|
||||
},
|
||||
"execution_count": 18,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"LLM Response1:\n",
|
||||
" ModelResponse(id='chatcmpl-8MNdPbrhtnwiPK1x3PEoGwrH144TW', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content=None, role='assistant', tool_calls=[ChatCompletionMessageToolCall(id='call_K2Giwoq3NloGPfSv25MJVFZG', function=Function(arguments='{\"location\": \"San Francisco\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_6K8bYCZK6qsbMY3n51FzE5Nz', function=Function(arguments='{\"location\": \"Tokyo\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_cKSmUEJGufDwS7TaUHWzp7qx', function=Function(arguments='{\"location\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')]))], created=1700344759, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage={'completion_tokens': 77, 'prompt_tokens': 88, 'total_tokens': 165}, _response_ms=1049.913)\n",
|
||||
"\n",
|
||||
"Tool Choice:\n",
|
||||
" [ChatCompletionMessageToolCall(id='call_K2Giwoq3NloGPfSv25MJVFZG', function=Function(arguments='{\"location\": \"San Francisco\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_6K8bYCZK6qsbMY3n51FzE5Nz', function=Function(arguments='{\"location\": \"Tokyo\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_cKSmUEJGufDwS7TaUHWzp7qx', function=Function(arguments='{\"location\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')]\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Step 2 - Parse the Model Response and Execute Functions"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "tD4lJQ40cU44"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Check if the model wants to call a function\n",
|
||||
"if tool_calls:\n",
|
||||
" # Execute the functions and prepare responses\n",
|
||||
" available_functions = {\n",
|
||||
" \"get_current_weather\": get_current_weather,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" messages.append(response_message) # Extend conversation with assistant's reply\n",
|
||||
"\n",
|
||||
" for tool_call in tool_calls:\n",
|
||||
" print(f\"\\nExecuting tool call\\n{tool_call}\")\n",
|
||||
" function_name = tool_call.function.name\n",
|
||||
" function_to_call = available_functions[function_name]\n",
|
||||
" function_args = json.loads(tool_call.function.arguments)\n",
|
||||
" # calling the get_current_weather() function\n",
|
||||
" function_response = function_to_call(\n",
|
||||
" location=function_args.get(\"location\"),\n",
|
||||
" unit=function_args.get(\"unit\"),\n",
|
||||
" )\n",
|
||||
" print(f\"Result from tool call\\n{function_response}\\n\")\n",
|
||||
"\n",
|
||||
" # Extend conversation with function response\n",
|
||||
" messages.append(\n",
|
||||
" {\n",
|
||||
" \"tool_call_id\": tool_call.id,\n",
|
||||
" \"role\": \"tool\",\n",
|
||||
" \"name\": function_name,\n",
|
||||
" \"content\": function_response,\n",
|
||||
" }\n",
|
||||
" )\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "af4oXQvicV_n",
|
||||
"outputId": "abf6ac3e-4a21-4a4f-b8d7-809b763d0632"
|
||||
},
|
||||
"execution_count": 21,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"Executing tool call\n",
|
||||
"ChatCompletionMessageToolCall(id='call_K2Giwoq3NloGPfSv25MJVFZG', function=Function(arguments='{\"location\": \"San Francisco\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')\n",
|
||||
"Result from tool call\n",
|
||||
"{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Executing tool call\n",
|
||||
"ChatCompletionMessageToolCall(id='call_6K8bYCZK6qsbMY3n51FzE5Nz', function=Function(arguments='{\"location\": \"Tokyo\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')\n",
|
||||
"Result from tool call\n",
|
||||
"{\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Executing tool call\n",
|
||||
"ChatCompletionMessageToolCall(id='call_cKSmUEJGufDwS7TaUHWzp7qx', function=Function(arguments='{\"location\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function')\n",
|
||||
"Result from tool call\n",
|
||||
"{\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"}\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Step 3 - Second litellm.completion() call"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "E3OL1fqUdFdv"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"second_response = litellm.completion(\n",
|
||||
" model=\"gpt-3.5-turbo-1106\",\n",
|
||||
" messages=messages,\n",
|
||||
")\n",
|
||||
"print(\"Second Response\\n\", second_response)\n",
|
||||
"print(\"Second Response Message\\n\", second_response.choices[0].message.content)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "8KYB2n-jc1_f",
|
||||
"outputId": "6c6448ae-1c09-43ae-eb90-208b118e6179"
|
||||
},
|
||||
"execution_count": 26,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Second Response\n",
|
||||
" ModelResponse(id='chatcmpl-8MNhat166ZqjO6egXcUh85Pd0s7KV', choices=[Choices(finish_reason='stop', index=0, message=Message(content=\"The current weather in San Francisco is 72°F, in Tokyo it's 10°C, and in Paris it's 22°C.\", role='assistant'))], created=1700345018, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage={'completion_tokens': 28, 'prompt_tokens': 465, 'total_tokens': 493}, _response_ms=999.246)\n",
|
||||
"Second Response Message\n",
|
||||
" The current weather in San Francisco is 72°F, in Tokyo it's 10°C, and in Paris it's 22°C.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Using Azure OpenAI"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "1cIIFEvXjofp"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# set Azure env variables\n",
|
||||
"import os\n",
|
||||
"os.environ['AZURE_API_KEY'] = \"\" # litellm reads AZURE_API_KEY from .env and sends the request\n",
|
||||
"os.environ['AZURE_API_BASE'] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n",
|
||||
"os.environ['AZURE_API_VERSION'] = \"2023-07-01-preview\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lG9mUnModeeE"
|
||||
},
|
||||
"execution_count": 32,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Step 1"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "17S-Ysksj-E_"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"import json\n",
|
||||
"# Example dummy function hard coded to return the same weather\n",
|
||||
"# In production, this could be your backend API or an external API\n",
|
||||
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
|
||||
" \"\"\"Get the current weather in a given location\"\"\"\n",
|
||||
" if \"tokyo\" in location.lower():\n",
|
||||
" return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
|
||||
" elif \"san francisco\" in location.lower():\n",
|
||||
" return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
|
||||
" elif \"paris\" in location.lower():\n",
|
||||
" return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
|
||||
" else:\n",
|
||||
" return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
" \"type\": \"function\",\n",
|
||||
" \"function\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"],\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"response = litellm.completion(\n",
|
||||
" model=\"azure/chatgpt-functioncalling\", # model = azure/<your-azure-deployment-name>\n",
|
||||
" messages=messages,\n",
|
||||
" tools=tools,\n",
|
||||
" tool_choice=\"auto\", # auto is default, but we'll be explicit\n",
|
||||
")\n",
|
||||
"print(\"\\nLLM Response1:\\n\", response)\n",
|
||||
"response_message = response.choices[0].message\n",
|
||||
"tool_calls = response.choices[0].message.tool_calls\n",
|
||||
"print(\"\\nTool Choice:\\n\", tool_calls)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "boAIHLEXj80m",
|
||||
"outputId": "00afcf09-5b6b-4805-c374-ba089cc6eb43"
|
||||
},
|
||||
"execution_count": 33,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"LLM Response1:\n",
|
||||
" ModelResponse(id='chatcmpl-8MOBPvEnqG7qitkmVqZmCrzSGEmDj', choices=[Choices(finish_reason='tool_calls', index=0, message=Message(content=None, role='assistant', tool_calls=[ChatCompletionMessageToolCall(id='call_7gZ0PkmmmgzTOxfF01ATp0U5', function=Function(arguments='{\\n \"location\": \"San Francisco, CA\"\\n}', name='get_current_weather'), type='function')]))], created=1700346867, model='gpt-35-turbo', object='chat.completion', system_fingerprint=None, usage={'completion_tokens': 19, 'prompt_tokens': 88, 'total_tokens': 107}, _response_ms=833.4319999999999)\n",
|
||||
"\n",
|
||||
"Tool Choice:\n",
|
||||
" [ChatCompletionMessageToolCall(id='call_7gZ0PkmmmgzTOxfF01ATp0U5', function=Function(arguments='{\\n \"location\": \"San Francisco, CA\"\\n}', name='get_current_weather'), type='function')]\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Step 2"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "hqh1y1IMkmGO"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Check if the model wants to call a function\n",
|
||||
"if tool_calls:\n",
|
||||
" # Execute the functions and prepare responses\n",
|
||||
" available_functions = {\n",
|
||||
" \"get_current_weather\": get_current_weather,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" messages.append(response_message) # Extend conversation with assistant's reply\n",
|
||||
"\n",
|
||||
" for tool_call in tool_calls:\n",
|
||||
" print(f\"\\nExecuting tool call\\n{tool_call}\")\n",
|
||||
" function_name = tool_call.function.name\n",
|
||||
" function_to_call = available_functions[function_name]\n",
|
||||
" function_args = json.loads(tool_call.function.arguments)\n",
|
||||
" # calling the get_current_weather() function\n",
|
||||
" function_response = function_to_call(\n",
|
||||
" location=function_args.get(\"location\"),\n",
|
||||
" unit=function_args.get(\"unit\"),\n",
|
||||
" )\n",
|
||||
" print(f\"Result from tool call\\n{function_response}\\n\")\n",
|
||||
"\n",
|
||||
" # Extend conversation with function response\n",
|
||||
" messages.append(\n",
|
||||
" {\n",
|
||||
" \"tool_call_id\": tool_call.id,\n",
|
||||
" \"role\": \"tool\",\n",
|
||||
" \"name\": function_name,\n",
|
||||
" \"content\": function_response,\n",
|
||||
" }\n",
|
||||
" )\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "FGu7DY7PkOiG",
|
||||
"outputId": "96d39ae7-7fc8-4dd8-c82f-5ee9a486724c"
|
||||
},
|
||||
"execution_count": 34,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"Executing tool call\n",
|
||||
"ChatCompletionMessageToolCall(id='call_7gZ0PkmmmgzTOxfF01ATp0U5', function=Function(arguments='{\\n \"location\": \"San Francisco, CA\"\\n}', name='get_current_weather'), type='function')\n",
|
||||
"Result from tool call\n",
|
||||
"{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Step 3"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "4MjYyeajkpBl"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"second_response = litellm.completion(\n",
|
||||
" model=\"azure/chatgpt-functioncalling\",\n",
|
||||
" messages=messages,\n",
|
||||
")\n",
|
||||
"print(\"Second Response\\n\", second_response)\n",
|
||||
"print(\"Second Response Message\\n\", second_response.choices[0].message.content)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "qHgXyZq1kqGn",
|
||||
"outputId": "61a30470-d7f5-484d-c42b-681c9b60b34a"
|
||||
},
|
||||
"execution_count": 36,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Second Response\n",
|
||||
" ModelResponse(id='chatcmpl-8MOC90vwZ2LHX0DE796XYtsOxdGcc', choices=[Choices(finish_reason='stop', index=0, message=Message(content='The current weather in San Francisco is 72°F.', role='assistant'))], created=1700346913, model='gpt-35-turbo', object='chat.completion', system_fingerprint=None, usage={'completion_tokens': 11, 'prompt_tokens': 69, 'total_tokens': 80}, _response_ms=824.882)\n",
|
||||
"Second Response Message\n",
|
||||
" The current weather in San Francisco is 72°F.\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,205 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "680oRk1af-xJ"
|
||||
},
|
||||
"source": [
|
||||
"# Environment Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "X7TgJFn8f88p"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import csv\n",
|
||||
"from typing import Optional\n",
|
||||
"import httpx\n",
|
||||
"import json\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"proxy_base_url = \"http://0.0.0.0:4000\" # 👈 SET TO PROXY URL\n",
|
||||
"master_key = \"sk-1234\" # 👈 SET TO PROXY MASTER KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "rauw8EOhgBz5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## GLOBAL HTTP CLIENT ## - faster http calls\n",
|
||||
"class HTTPHandler:\n",
|
||||
" def __init__(self, concurrent_limit=1000):\n",
|
||||
" # Create a client with a connection pool\n",
|
||||
" self.client = httpx.AsyncClient(\n",
|
||||
" limits=httpx.Limits(\n",
|
||||
" max_connections=concurrent_limit,\n",
|
||||
" max_keepalive_connections=concurrent_limit,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" async def close(self):\n",
|
||||
" # Close the client when you're done with it\n",
|
||||
" await self.client.aclose()\n",
|
||||
"\n",
|
||||
" async def get(\n",
|
||||
" self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None\n",
|
||||
" ):\n",
|
||||
" response = await self.client.get(url, params=params, headers=headers)\n",
|
||||
" return response\n",
|
||||
"\n",
|
||||
" async def post(\n",
|
||||
" self,\n",
|
||||
" url: str,\n",
|
||||
" data: Optional[dict] = None,\n",
|
||||
" params: Optional[dict] = None,\n",
|
||||
" headers: Optional[dict] = None,\n",
|
||||
" ):\n",
|
||||
" try:\n",
|
||||
" response = await self.client.post(\n",
|
||||
" url, data=data, params=params, headers=headers\n",
|
||||
" )\n",
|
||||
" return response\n",
|
||||
" except Exception as e:\n",
|
||||
" raise e\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "7LXN8zaLgOie"
|
||||
},
|
||||
"source": [
|
||||
"# Import Sheet\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Format: | ID | Name | Max Budget |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "oiED0usegPGf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async def import_sheet():\n",
|
||||
" tasks = []\n",
|
||||
" http_client = HTTPHandler()\n",
|
||||
" with open('my-batch-sheet.csv', 'r') as file:\n",
|
||||
" csv_reader = csv.DictReader(file)\n",
|
||||
" for row in csv_reader:\n",
|
||||
" task = create_user(client=http_client, user_id=row['ID'], max_budget=row['Max Budget'], user_name=row['Name'])\n",
|
||||
" tasks.append(task)\n",
|
||||
" # print(f\"ID: {row['ID']}, Name: {row['Name']}, Max Budget: {row['Max Budget']}\")\n",
|
||||
"\n",
|
||||
" keys = await asyncio.gather(*tasks)\n",
|
||||
"\n",
|
||||
" with open('my-batch-sheet_new.csv', 'w', newline='') as new_file:\n",
|
||||
" fieldnames = ['ID', 'Name', 'Max Budget', 'keys']\n",
|
||||
" csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames)\n",
|
||||
" csv_writer.writeheader()\n",
|
||||
"\n",
|
||||
" with open('my-batch-sheet.csv', 'r') as file:\n",
|
||||
" csv_reader = csv.DictReader(file)\n",
|
||||
" for i, row in enumerate(csv_reader):\n",
|
||||
" row['keys'] = keys[i] # Add the 'keys' value from the corresponding task result\n",
|
||||
" csv_writer.writerow(row)\n",
|
||||
"\n",
|
||||
" await http_client.close()\n",
|
||||
"\n",
|
||||
"asyncio.run(import_sheet())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E7M0Li_UgJeZ"
|
||||
},
|
||||
"source": [
|
||||
"# Create Users + Keys\n",
|
||||
"\n",
|
||||
"- Creates a user\n",
|
||||
"- Creates a key with max budget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NZudRFujf7j-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"async def create_key_with_alias(client: HTTPHandler, user_id: str, max_budget: float):\n",
|
||||
" global proxy_base_url\n",
|
||||
" if not proxy_base_url.endswith(\"/\"):\n",
|
||||
" proxy_base_url += \"/\"\n",
|
||||
" url = proxy_base_url + \"key/generate\"\n",
|
||||
"\n",
|
||||
" # call /key/generate\n",
|
||||
" print(\"CALLING /KEY/GENERATE\")\n",
|
||||
" response = await client.post(\n",
|
||||
" url=url,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
|
||||
" data=json.dumps({\n",
|
||||
" \"user_id\": user_id,\n",
|
||||
" \"key_alias\": f\"{user_id}-key\",\n",
|
||||
" \"max_budget\": max_budget # 👈 KEY CHANGE: SETS MAX BUDGET PER KEY\n",
|
||||
" })\n",
|
||||
" )\n",
|
||||
" print(f\"response: {response.text}\")\n",
|
||||
" return response.json()[\"key\"]\n",
|
||||
"\n",
|
||||
"async def create_user(client: HTTPHandler, user_id: str, max_budget: float, user_name: str):\n",
|
||||
" \"\"\"\n",
|
||||
" - call /user/new\n",
|
||||
" - create key for user\n",
|
||||
" \"\"\"\n",
|
||||
" global proxy_base_url\n",
|
||||
" if not proxy_base_url.endswith(\"/\"):\n",
|
||||
" proxy_base_url += \"/\"\n",
|
||||
" url = proxy_base_url + \"user/new\"\n",
|
||||
"\n",
|
||||
" # call /user/new\n",
|
||||
" await client.post(\n",
|
||||
" url=url,\n",
|
||||
" headers={\"Authorization\": f\"Bearer {master_key}\"},\n",
|
||||
" data=json.dumps({\n",
|
||||
" \"user_id\": user_id,\n",
|
||||
" \"user_alias\": user_name,\n",
|
||||
" \"auto_create_key\": False,\n",
|
||||
" # \"max_budget\": max_budget # 👈 [OPTIONAL] Sets max budget per user (if you want to set a max budget across keys)\n",
|
||||
" })\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # create key for user\n",
|
||||
" return await create_key_with_alias(client=client, user_id=user_id, max_budget=max_budget)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@@ -1,157 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "eKXncoQbU_2j"
|
||||
},
|
||||
"source": [
|
||||
"# Using Nemo-Guardrails with LiteLLM Server\n",
|
||||
"\n",
|
||||
"[Call Bedrock, TogetherAI, Huggingface, etc. on the server](https://docs.litellm.ai/docs/providers)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ZciYaLwvuFbu"
|
||||
},
|
||||
"source": [
|
||||
"## Using with Bedrock\n",
|
||||
"\n",
|
||||
"`docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID=<your-aws-access-key> -e AWS_SECRET_ACCESS_KEY=<your-aws-secret-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "vOUwGSJ2Vsy3"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install nemoguardrails langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xXEJNxe7U0IN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name=\"anthropic.claude-v2\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-fake-key\")\n",
|
||||
"\n",
|
||||
"from nemoguardrails import LLMRails, RailsConfig\n",
|
||||
"\n",
|
||||
"config = RailsConfig.from_path(\"./config.yml\")\n",
|
||||
"app = LLMRails(config, llm=llm)\n",
|
||||
"\n",
|
||||
"new_message = app.generate(messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello! What can you do for me?\"\n",
|
||||
"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vz5n00qyuKjp"
|
||||
},
|
||||
"source": [
|
||||
"## Using with TogetherAI\n",
|
||||
"\n",
|
||||
"1. You can either set this in the server environment:\n",
|
||||
"`docker run -e PORT=8000 -e TOGETHERAI_API_KEY=<your-together-ai-api-key> -p 8000:8000 ghcr.io/berriai/litellm:latest`\n",
|
||||
"\n",
|
||||
"2. **Or** Pass this in as the api key `(...openai_api_key=\"<your-together-ai-api-key>\")`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "XK1sk-McuhpE"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model_name=\"together_ai/togethercomputer/CodeLlama-13b-Instruct\", openai_api_base=\"http://0.0.0.0:8000\", openai_api_key=\"my-together-ai-api-key\")\n",
|
||||
"\n",
|
||||
"from nemoguardrails import LLMRails, RailsConfig\n",
|
||||
"\n",
|
||||
"config = RailsConfig.from_path(\"./config.yml\")\n",
|
||||
"app = LLMRails(config, llm=llm)\n",
|
||||
"\n",
|
||||
"new_message = app.generate(messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"Hello! What can you do for me?\"\n",
|
||||
"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "8A1KWKnzuxAS"
|
||||
},
|
||||
"source": [
|
||||
"### CONFIG.YML\n",
|
||||
"\n",
|
||||
"save this example `config.yml` in your current directory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NKN1GmSvu0Cx"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# instructions:\n",
|
||||
"# - type: general\n",
|
||||
"# content: |\n",
|
||||
"# Below is a conversation between a bot and a user about the recent job reports.\n",
|
||||
"# The bot is factual and concise. If the bot does not know the answer to a\n",
|
||||
"# question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"# sample_conversation: |\n",
|
||||
"# user \"Hello there!\"\n",
|
||||
"# express greeting\n",
|
||||
"# bot express greeting\n",
|
||||
"# \"Hello! How can I assist you today?\"\n",
|
||||
"# user \"What can you do for me?\"\n",
|
||||
"# ask about capabilities\n",
|
||||
"# bot respond about capabilities\n",
|
||||
"# \"I am an AI assistant that helps answer mathematical questions. My core mathematical skills are powered by wolfram alpha.\"\n",
|
||||
"# user \"What's 2+2?\"\n",
|
||||
"# ask math question\n",
|
||||
"# bot responds to math question\n",
|
||||
"# \"2+2 is equal to 4.\"\n",
|
||||
"\n",
|
||||
"# models:\n",
|
||||
"# - type: main\n",
|
||||
"# engine: openai\n",
|
||||
"# model: claude-instant-1"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,404 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"machine_shape": "hm",
|
||||
"gpuType": "V100"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Set up Environment"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "vDOm5wfjdFLP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"!pip install --upgrade litellm"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Bx6mAA6MHiy_"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "zIYv7JTyxSxR",
|
||||
"outputId": "53890320-f9fa-4bf4-8362-0f17f52c6ed4"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Successfully installed fastapi-0.103.1 h11-0.14.0 huggingface-hub-0.16.4 ninja-1.11.1 pydantic-1.10.12 ray-2.6.3 safetensors-0.3.3 sentencepiece-0.1.99 starlette-0.27.0 tokenizers-0.13.3 transformers-4.33.1 uvicorn-0.23.2 vllm-0.1.4 xformers-0.0.21\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install vllm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Load the Logs"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "RMcoAni6WKEx"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "zchxB8c7WJe5"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# path of the csv file\n",
|
||||
"file_path = 'Model-prompts-example.csv'\n",
|
||||
"\n",
|
||||
"# load the csv file as a pandas DataFrame\n",
|
||||
"data = pd.read_csv(file_path)\n",
|
||||
"\n",
|
||||
"data.head()"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 81
|
||||
},
|
||||
"id": "aKcWr015WNPm",
|
||||
"outputId": "6e226773-333f-46a2-9fc8-4f54f309d204"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" Success Timestamp Input \\\n",
|
||||
"0 True 1694041195 This is the templated query input \n",
|
||||
"\n",
|
||||
" Output RunId (Wandb Runid) \\\n",
|
||||
"0 This is the query output from the model 8hlumwuk \n",
|
||||
"\n",
|
||||
" Model ID (or Name) \n",
|
||||
"0 OpenAI/Turbo-3.5 "
|
||||
],
|
||||
"text/html": [
|
||||
"\n",
|
||||
" <div id=\"df-cd06d09e-fb43-41b0-938f-37f9d285ae66\" class=\"colab-df-container\">\n",
|
||||
" <div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Success</th>\n",
|
||||
" <th>Timestamp</th>\n",
|
||||
" <th>Input</th>\n",
|
||||
" <th>Output</th>\n",
|
||||
" <th>RunId (Wandb Runid)</th>\n",
|
||||
" <th>Model ID (or Name)</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>1694041195</td>\n",
|
||||
" <td>This is the templated query input</td>\n",
|
||||
" <td>This is the query output from the model</td>\n",
|
||||
" <td>8hlumwuk</td>\n",
|
||||
" <td>OpenAI/Turbo-3.5</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
" <div class=\"colab-df-buttons\">\n",
|
||||
"\n",
|
||||
" <div class=\"colab-df-container\">\n",
|
||||
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cd06d09e-fb43-41b0-938f-37f9d285ae66')\"\n",
|
||||
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||||
" style=\"display:none;\">\n",
|
||||
"\n",
|
||||
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
|
||||
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
|
||||
" </svg>\n",
|
||||
" </button>\n",
|
||||
"\n",
|
||||
" <style>\n",
|
||||
" .colab-df-container {\n",
|
||||
" display:flex;\n",
|
||||
" gap: 12px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-convert {\n",
|
||||
" background-color: #E8F0FE;\n",
|
||||
" border: none;\n",
|
||||
" border-radius: 50%;\n",
|
||||
" cursor: pointer;\n",
|
||||
" display: none;\n",
|
||||
" fill: #1967D2;\n",
|
||||
" height: 32px;\n",
|
||||
" padding: 0 0 0 0;\n",
|
||||
" width: 32px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-convert:hover {\n",
|
||||
" background-color: #E2EBFA;\n",
|
||||
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||||
" fill: #174EA6;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .colab-df-buttons div {\n",
|
||||
" margin-bottom: 4px;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" [theme=dark] .colab-df-convert {\n",
|
||||
" background-color: #3B4455;\n",
|
||||
" fill: #D2E3FC;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" [theme=dark] .colab-df-convert:hover {\n",
|
||||
" background-color: #434B5C;\n",
|
||||
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||||
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||||
" fill: #FFFFFF;\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
"\n",
|
||||
" <script>\n",
|
||||
" const buttonEl =\n",
|
||||
" document.querySelector('#df-cd06d09e-fb43-41b0-938f-37f9d285ae66 button.colab-df-convert');\n",
|
||||
" buttonEl.style.display =\n",
|
||||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||||
"\n",
|
||||
" async function convertToInteractive(key) {\n",
|
||||
" const element = document.querySelector('#df-cd06d09e-fb43-41b0-938f-37f9d285ae66');\n",
|
||||
" const dataTable =\n",
|
||||
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||||
" [key], {});\n",
|
||||
" if (!dataTable) return;\n",
|
||||
"\n",
|
||||
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||||
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||||
" + ' to learn more about interactive tables.';\n",
|
||||
" element.innerHTML = '';\n",
|
||||
" dataTable['output_type'] = 'display_data';\n",
|
||||
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||||
" const docLink = document.createElement('div');\n",
|
||||
" docLink.innerHTML = docLinkHtml;\n",
|
||||
" element.appendChild(docLink);\n",
|
||||
" }\n",
|
||||
" </script>\n",
|
||||
" </div>\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" </div>\n",
|
||||
" </div>\n"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 6
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"input_texts = data['Input'].values"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "0DbL-kirWUyn"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [[{\"role\": \"user\", \"content\": input_text}] for input_text in input_texts]"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "cqpAvy8hWXyC"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Running Inference"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "SugCyom0Xy8U"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import batch_completion\n",
|
||||
"model_name = \"facebook/opt-125m\"\n",
|
||||
"provider = \"vllm\"\n",
|
||||
"response_list = batch_completion(\n",
|
||||
" model=model_name,\n",
|
||||
" custom_llm_provider=provider, # can easily switch to huggingface, replicate, together ai, sagemaker, etc.\n",
|
||||
" messages=messages,\n",
|
||||
" temperature=0.2,\n",
|
||||
" max_tokens=80,\n",
|
||||
" )"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "qpikx3uxHns3"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"response_list"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "QDPikHtwKJJ2",
|
||||
"outputId": "06f47c44-e258-452a-f9db-232a5b6d2810"
|
||||
},
|
||||
"execution_count": 10,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[<ModelResponse at 0x7e5b87616750> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \".\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1694053363.6139505,\n",
|
||||
" \"model\": \"facebook/opt-125m\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 9,\n",
|
||||
" \"completion_tokens\": 80,\n",
|
||||
" \"total_tokens\": 89\n",
|
||||
" }\n",
|
||||
" }]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 10
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"response_values = [response['choices'][0]['message']['content'] for response in response_list]"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "SYqTcCiJbQDF"
|
||||
},
|
||||
"execution_count": 11,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"response_values"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wqs-Oy9FbiPo",
|
||||
"outputId": "16a6a7b7-97c8-4b5b-eff8-09ea5eb5ad06"
|
||||
},
|
||||
"execution_count": 12,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is used to query the data.\\n\\nThe query input is the query input that is']"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 12
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"data[f\"{model_name}_output\"] = response_values"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "mElNbBehbkrz"
|
||||
},
|
||||
"execution_count": 13,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"data.to_csv('model_responses.csv', index=False)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "F06NXssDc45k"
|
||||
},
|
||||
"execution_count": 14,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,90 +0,0 @@
|
||||
from litellm import completion, completion_cost
|
||||
import time
|
||||
import click
|
||||
from tqdm import tqdm
|
||||
from tabulate import tabulate
|
||||
from termcolor import colored
|
||||
import os
|
||||
|
||||
|
||||
# Define the list of models to benchmark
|
||||
# select any LLM listed here: https://docs.litellm.ai/docs/providers
|
||||
models = ["gpt-3.5-turbo", "claude-2"]
|
||||
|
||||
# Enter LLM API keys
|
||||
# https://docs.litellm.ai/docs/providers
|
||||
os.environ["OPENAI_API_KEY"] = ""
|
||||
os.environ["ANTHROPIC_API_KEY"] = ""
|
||||
|
||||
# List of questions to benchmark (replace with your questions)
|
||||
questions = ["When will BerriAI IPO?", "When will LiteLLM hit $100M ARR?"]
|
||||
|
||||
# Enter your system prompt here
|
||||
system_prompt = """
|
||||
You are LiteLLMs helpful assistant
|
||||
"""
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--system-prompt",
|
||||
default="You are a helpful assistant that can answer questions.",
|
||||
help="System prompt for the conversation.",
|
||||
)
|
||||
def main(system_prompt):
|
||||
for question in questions:
|
||||
data = [] # Data for the current question
|
||||
|
||||
with tqdm(total=len(models)) as pbar:
|
||||
for model in models:
|
||||
colored_description = colored(
|
||||
f"Running question: {question} for model: {model}", "green"
|
||||
)
|
||||
pbar.set_description(colored_description)
|
||||
start_time = time.time()
|
||||
|
||||
response = completion(
|
||||
model=model,
|
||||
max_tokens=500,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": question},
|
||||
],
|
||||
)
|
||||
|
||||
end = time.time()
|
||||
total_time = end - start_time
|
||||
cost = completion_cost(completion_response=response)
|
||||
raw_response = response["choices"][0]["message"]["content"]
|
||||
|
||||
data.append(
|
||||
{
|
||||
"Model": colored(model, "light_blue"),
|
||||
"Response": raw_response, # Colorize the response
|
||||
"ResponseTime": colored(f"{total_time:.2f} seconds", "red"),
|
||||
"Cost": colored(f"${cost:.6f}", "green"), # Colorize the cost
|
||||
}
|
||||
)
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
# Separate headers from the data
|
||||
headers = ["Model", "Response", "Response Time (seconds)", "Cost ($)"]
|
||||
colwidths = [15, 80, 15, 10]
|
||||
|
||||
# Create a nicely formatted table for the current question
|
||||
table = tabulate(
|
||||
[list(d.values()) for d in data],
|
||||
headers,
|
||||
tablefmt="grid",
|
||||
maxcolwidths=colwidths,
|
||||
)
|
||||
|
||||
# Print the table for the current question
|
||||
colored_question = colored(question, "green")
|
||||
click.echo(f"\nBenchmark Results for '{colored_question}':")
|
||||
click.echo(table) # Display the formatted table
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,30 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import litellm
|
||||
|
||||
from autoevals.llm import *
|
||||
|
||||
###################
|
||||
|
||||
# litellm completion call
|
||||
question = "which country has the highest population"
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": question}],
|
||||
)
|
||||
print(response)
|
||||
# use the auto eval Factuality() evaluator
|
||||
|
||||
print("calling evaluator")
|
||||
evaluator = Factuality()
|
||||
result = evaluator(
|
||||
output=response.choices[0]["message"][
|
||||
"content"
|
||||
], # response from litellm.completion()
|
||||
expected="India", # expected output
|
||||
input=question, # question passed to litellm.completion
|
||||
)
|
||||
|
||||
print(result)
|
@@ -1,181 +0,0 @@
|
||||
<h1 align="center">
|
||||
LLM-Bench
|
||||
</h1>
|
||||
<p align="center">
|
||||
<p align="center">Benchmark LLMs response, cost and response time</p>
|
||||
<p>LLM vs Cost per input + output token ($)</p>
|
||||
<img width="806" alt="Screenshot 2023-11-13 at 2 51 06 PM" src="https://github.com/BerriAI/litellm/assets/29436595/6d1bed71-d062-40b8-a113-28359672636a">
|
||||
</p>
|
||||
<a href="https://docs.google.com/spreadsheets/d/1mvPbP02OLFgc-5-Ubn1KxGuQQdbMyG1jhMSWxAldWy4/edit?usp=sharing">
|
||||
Bar Graph Excel Sheet here
|
||||
</a>
|
||||
|
||||
| Model | Provider | Cost per input + output token ($)|
|
||||
| --- | --- | --- |
|
||||
| openrouter/mistralai/mistral-7b-instruct | openrouter | 0.0 |
|
||||
| ollama/llama2 | ollama | 0.0 |
|
||||
| ollama/llama2:13b | ollama | 0.0 |
|
||||
| ollama/llama2:70b | ollama | 0.0 |
|
||||
| ollama/llama2-uncensored | ollama | 0.0 |
|
||||
| ollama/mistral | ollama | 0.0 |
|
||||
| ollama/codellama | ollama | 0.0 |
|
||||
| ollama/orca-mini | ollama | 0.0 |
|
||||
| ollama/vicuna | ollama | 0.0 |
|
||||
| perplexity/codellama-34b-instruct | perplexity | 0.0 |
|
||||
| perplexity/llama-2-13b-chat | perplexity | 0.0 |
|
||||
| perplexity/llama-2-70b-chat | perplexity | 0.0 |
|
||||
| perplexity/mistral-7b-instruct | perplexity | 0.0 |
|
||||
| perplexity/replit-code-v1.5-3b | perplexity | 0.0 |
|
||||
| text-bison | vertex_ai-text-models | 0.00000025 |
|
||||
| text-bison@001 | vertex_ai-text-models | 0.00000025 |
|
||||
| chat-bison | vertex_ai-chat-models | 0.00000025 |
|
||||
| chat-bison@001 | vertex_ai-chat-models | 0.00000025 |
|
||||
| chat-bison-32k | vertex_ai-chat-models | 0.00000025 |
|
||||
| code-bison | vertex_ai-code-text-models | 0.00000025 |
|
||||
| code-bison@001 | vertex_ai-code-text-models | 0.00000025 |
|
||||
| code-gecko@001 | vertex_ai-chat-models | 0.00000025 |
|
||||
| code-gecko@latest | vertex_ai-chat-models | 0.00000025 |
|
||||
| codechat-bison | vertex_ai-code-chat-models | 0.00000025 |
|
||||
| codechat-bison@001 | vertex_ai-code-chat-models | 0.00000025 |
|
||||
| codechat-bison-32k | vertex_ai-code-chat-models | 0.00000025 |
|
||||
| palm/chat-bison | palm | 0.00000025 |
|
||||
| palm/chat-bison-001 | palm | 0.00000025 |
|
||||
| palm/text-bison | palm | 0.00000025 |
|
||||
| palm/text-bison-001 | palm | 0.00000025 |
|
||||
| palm/text-bison-safety-off | palm | 0.00000025 |
|
||||
| palm/text-bison-safety-recitation-off | palm | 0.00000025 |
|
||||
| anyscale/meta-llama/Llama-2-7b-chat-hf | anyscale | 0.0000003 |
|
||||
| anyscale/mistralai/Mistral-7B-Instruct-v0.1 | anyscale | 0.0000003 |
|
||||
| openrouter/meta-llama/llama-2-13b-chat | openrouter | 0.0000004 |
|
||||
| openrouter/nousresearch/nous-hermes-llama2-13b | openrouter | 0.0000004 |
|
||||
| deepinfra/meta-llama/Llama-2-7b-chat-hf | deepinfra | 0.0000004 |
|
||||
| deepinfra/mistralai/Mistral-7B-Instruct-v0.1 | deepinfra | 0.0000004 |
|
||||
| anyscale/meta-llama/Llama-2-13b-chat-hf | anyscale | 0.0000005 |
|
||||
| amazon.titan-text-lite-v1 | bedrock | 0.0000007 |
|
||||
| deepinfra/meta-llama/Llama-2-13b-chat-hf | deepinfra | 0.0000007 |
|
||||
| text-babbage-001 | text-completion-openai | 0.0000008 |
|
||||
| text-ada-001 | text-completion-openai | 0.0000008 |
|
||||
| babbage-002 | text-completion-openai | 0.0000008 |
|
||||
| openrouter/google/palm-2-chat-bison | openrouter | 0.000001 |
|
||||
| openrouter/google/palm-2-codechat-bison | openrouter | 0.000001 |
|
||||
| openrouter/meta-llama/codellama-34b-instruct | openrouter | 0.000001 |
|
||||
| deepinfra/codellama/CodeLlama-34b-Instruct-hf | deepinfra | 0.0000012 |
|
||||
| deepinfra/meta-llama/Llama-2-70b-chat-hf | deepinfra | 0.0000016499999999999999 |
|
||||
| deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1 | deepinfra | 0.0000016499999999999999 |
|
||||
| anyscale/meta-llama/Llama-2-70b-chat-hf | anyscale | 0.000002 |
|
||||
| anyscale/codellama/CodeLlama-34b-Instruct-hf | anyscale | 0.000002 |
|
||||
| gpt-3.5-turbo-1106 | openai | 0.000003 |
|
||||
| openrouter/meta-llama/llama-2-70b-chat | openrouter | 0.000003 |
|
||||
| amazon.titan-text-express-v1 | bedrock | 0.000003 |
|
||||
| gpt-3.5-turbo | openai | 0.0000035 |
|
||||
| gpt-3.5-turbo-0301 | openai | 0.0000035 |
|
||||
| gpt-3.5-turbo-0613 | openai | 0.0000035 |
|
||||
| gpt-3.5-turbo-instruct | text-completion-openai | 0.0000035 |
|
||||
| openrouter/openai/gpt-3.5-turbo | openrouter | 0.0000035 |
|
||||
| cohere.command-text-v14 | bedrock | 0.0000035 |
|
||||
| gpt-3.5-turbo-0613 | openai | 0.0000035 |
|
||||
| claude-instant-1 | anthropic | 0.00000714 |
|
||||
| claude-instant-1.2 | anthropic | 0.00000714 |
|
||||
| openrouter/anthropic/claude-instant-v1 | openrouter | 0.00000714 |
|
||||
| anthropic.claude-instant-v1 | bedrock | 0.00000714 |
|
||||
| openrouter/mancer/weaver | openrouter | 0.00001125 |
|
||||
| j2-mid | ai21 | 0.00002 |
|
||||
| ai21.j2-mid-v1 | bedrock | 0.000025 |
|
||||
| openrouter/jondurbin/airoboros-l2-70b-2.1 | openrouter | 0.00002775 |
|
||||
| command-nightly | cohere | 0.00003 |
|
||||
| command | cohere | 0.00003 |
|
||||
| command-light | cohere | 0.00003 |
|
||||
| command-medium-beta | cohere | 0.00003 |
|
||||
| command-xlarge-beta | cohere | 0.00003 |
|
||||
| command-r-plus| cohere | 0.000018 |
|
||||
| j2-ultra | ai21 | 0.00003 |
|
||||
| ai21.j2-ultra-v1 | bedrock | 0.0000376 |
|
||||
| gpt-4-1106-preview | openai | 0.00004 |
|
||||
| gpt-4-vision-preview | openai | 0.00004 |
|
||||
| claude-2 | anthropic | 0.0000437 |
|
||||
| openrouter/anthropic/claude-2 | openrouter | 0.0000437 |
|
||||
| anthropic.claude-v1 | bedrock | 0.0000437 |
|
||||
| anthropic.claude-v2 | bedrock | 0.0000437 |
|
||||
| gpt-4 | openai | 0.00009 |
|
||||
| gpt-4-0314 | openai | 0.00009 |
|
||||
| gpt-4-0613 | openai | 0.00009 |
|
||||
| openrouter/openai/gpt-4 | openrouter | 0.00009 |
|
||||
| gpt-4-32k | openai | 0.00018 |
|
||||
| gpt-4-32k-0314 | openai | 0.00018 |
|
||||
| gpt-4-32k-0613 | openai | 0.00018 |
|
||||
|
||||
|
||||
|
||||
## Setup:
|
||||
```
|
||||
git clone https://github.com/BerriAI/litellm
|
||||
```
|
||||
cd to `benchmark` dir
|
||||
```
|
||||
cd litellm/cookbook/benchmark
|
||||
```
|
||||
|
||||
### Install Dependencies
|
||||
```
|
||||
pip install litellm click tqdm tabulate termcolor
|
||||
```
|
||||
|
||||
### Configuration
|
||||
In `benchmark/benchmark.py` select your LLMs, LLM API Key and questions
|
||||
|
||||
Supported LLMs: https://docs.litellm.ai/docs/providers
|
||||
|
||||
```python
|
||||
# Define the list of models to benchmark
|
||||
models = ['gpt-3.5-turbo', 'togethercomputer/llama-2-70b-chat', 'claude-2']
|
||||
|
||||
# Enter LLM API keys
|
||||
os.environ['OPENAI_API_KEY'] = ""
|
||||
os.environ['ANTHROPIC_API_KEY'] = ""
|
||||
os.environ['TOGETHERAI_API_KEY'] = ""
|
||||
|
||||
# List of questions to benchmark (replace with your questions)
|
||||
questions = [
|
||||
"When will BerriAI IPO?",
|
||||
"When will LiteLLM hit $100M ARR?"
|
||||
]
|
||||
|
||||
```
|
||||
|
||||
## Run LLM-Bench
|
||||
```
|
||||
python3 benchmark.py
|
||||
```
|
||||
|
||||
## Expected Output
|
||||
```
|
||||
Running question: When will BerriAI IPO? for model: claude-2: 100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:13<00:00, 4.41s/it]
|
||||
|
||||
Benchmark Results for 'When will BerriAI IPO?':
|
||||
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
|
||||
| Model | Response | Response Time (seconds) | Cost ($) |
|
||||
+=================+==================================================================================+===========================+============+
|
||||
| gpt-3.5-turbo | As an AI language model, I cannot provide up-to-date information or predict | 1.55 seconds | $0.000122 |
|
||||
| | future events. It is best to consult a reliable financial source or contact | | |
|
||||
| | BerriAI directly for information regarding their IPO plans. | | |
|
||||
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
|
||||
| togethercompute | I'm not able to provide information about future IPO plans or dates for BerriAI | 8.52 seconds | $0.000531 |
|
||||
| r/llama-2-70b-c | or any other company. IPO (Initial Public Offering) plans and timelines are | | |
|
||||
| hat | typically kept private by companies until they are ready to make a public | | |
|
||||
| | announcement. It's important to note that IPO plans can change and are subject | | |
|
||||
| | to various factors, such as market conditions, financial performance, and | | |
|
||||
| | regulatory approvals. Therefore, it's difficult to predict with certainty when | | |
|
||||
| | BerriAI or any other company will go public. If you're interested in staying | | |
|
||||
| | up-to-date with BerriAI's latest news and developments, you may want to follow | | |
|
||||
| | their official social media accounts, subscribe to their newsletter, or visit | | |
|
||||
| | their website periodically for updates. | | |
|
||||
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
|
||||
| claude-2 | I do not have any information about when or if BerriAI will have an initial | 3.17 seconds | $0.002084 |
|
||||
| | public offering (IPO). As an AI assistant created by Anthropic to be helpful, | | |
|
||||
| | harmless, and honest, I do not have insider knowledge about Anthropic's business | | |
|
||||
| | plans or strategies. | | |
|
||||
+-----------------+----------------------------------------------------------------------------------+---------------------------+------------+
|
||||
```
|
||||
|
||||
## Support
|
||||
**🤝 Schedule a 1-on-1 Session:** Book a [1-on-1 session](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) with Krrish and Ishaan, the founders, to discuss any issues, provide feedback, or explore how we can improve LiteLLM for you.
|
@@ -1,154 +0,0 @@
|
||||
# CodeLlama Server: Streaming, Caching, Model Fallbacks (OpenAI + Anthropic), Prompt-tracking
|
||||
|
||||
Works with: Anthropic, Huggingface, Cohere, TogetherAI, Azure, OpenAI, etc.
|
||||
|
||||
[](https://pypi.org/project/litellm/)
|
||||
[](https://pypi.org/project/litellm/0.1.1/)
|
||||

|
||||
|
||||
[](https://railway.app/template/HuDPw-?referralCode=jch2ME)
|
||||
|
||||
**LIVE DEMO** - https://litellm.ai/playground
|
||||
|
||||
## What does CodeLlama Server do
|
||||
|
||||
- Uses Together AI's CodeLlama to answer coding questions, with GPT-4 + Claude-2 as backups (you can easily switch this to any model from Huggingface, Replicate, Cohere, AI21, Azure, OpenAI, etc.)
|
||||
- Sets default system prompt for guardrails `system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."`
|
||||
- Integrates with Promptlayer for model + prompt tracking
|
||||
- Example output
|
||||
|
||||
<img src="imgs/code-output.png" alt="Code Output" width="600"/>
|
||||
|
||||
- **Consistent Input/Output** Format
|
||||
- Call all models using the OpenAI format - `completion(model, messages)`
|
||||
- Text responses will always be available at `['choices'][0]['message']['content']`
|
||||
- Stream responses will always be available at `['choices'][0]['delta']['content']`
|
||||
- **Error Handling** Using Model Fallbacks (if `CodeLlama` fails, try `GPT-4`) with cooldowns, and retries
|
||||
- **Prompt Logging** - Log successful completions to promptlayer for testing + iterating on your prompts in production! (Learn more: https://litellm.readthedocs.io/en/latest/advanced/
|
||||
|
||||
**Example: Logs sent to PromptLayer**
|
||||
|
||||
<img src="imgs/promptlayer_logging.png" alt="Prompt Logging" width="900"/>
|
||||
|
||||
|
||||
- **Token Usage & Spend** - Track Input + Completion tokens used + Spend/model - https://docs.litellm.ai/docs/token_usage
|
||||
- **Caching** - Provides in-memory cache + GPT-Cache integration for more advanced usage - https://docs.litellm.ai/docs/caching/gpt_cache
|
||||
|
||||
- **Streaming & Async Support** - Return generators to stream text responses - TEST IT 👉 https://litellm.ai/
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `/chat/completions` (POST)
|
||||
|
||||
This endpoint is used to generate chat completions for 50+ support LLM API Models. Use llama2, GPT-4, Claude2 etc
|
||||
|
||||
#### Input
|
||||
|
||||
This API endpoint accepts all inputs in raw JSON and expects the following inputs
|
||||
|
||||
- `prompt` (string, required): The user's coding related question
|
||||
- Additional Optional parameters: `temperature`, `functions`, `function_call`, `top_p`, `n`, `stream`. See the full list of supported inputs here: https://litellm.readthedocs.io/en/latest/input/
|
||||
|
||||
#### Example JSON body
|
||||
|
||||
For claude-2
|
||||
|
||||
```json
|
||||
{
|
||||
"prompt": "write me a function to print hello world"
|
||||
}
|
||||
```
|
||||
|
||||
### Making an API request to the Code-Gen Server
|
||||
|
||||
```python
|
||||
import requests
|
||||
import json
|
||||
|
||||
url = "localhost:4000/chat/completions"
|
||||
|
||||
payload = json.dumps({
|
||||
"prompt": "write me a function to print hello world"
|
||||
})
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
|
||||
print(response.text)
|
||||
|
||||
```
|
||||
|
||||
### Output [Response Format]
|
||||
|
||||
Responses from the server are given in the following format.
|
||||
All responses from the server are returned in the following format (for all LLM models). More info on output here: https://litellm.readthedocs.io/en/latest/output/
|
||||
|
||||
```json
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": ".\n\n```\ndef print_hello_world():\n print(\"hello world\")\n",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1693279694.6474009,
|
||||
"model": "togethercomputer/CodeLlama-34b-Instruct",
|
||||
"usage": {
|
||||
"completion_tokens": 14,
|
||||
"prompt_tokens": 28,
|
||||
"total_tokens": 42
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Installation & Usage
|
||||
|
||||
### Running Locally
|
||||
|
||||
1. Clone liteLLM repository to your local machine:
|
||||
```
|
||||
git clone https://github.com/BerriAI/litellm-CodeLlama-server
|
||||
```
|
||||
2. Install the required dependencies using pip
|
||||
```
|
||||
pip install requirements.txt
|
||||
```
|
||||
3. Set your LLM API keys
|
||||
```
|
||||
os.environ['OPENAI_API_KEY]` = "YOUR_API_KEY"
|
||||
or
|
||||
set OPENAI_API_KEY in your .env file
|
||||
```
|
||||
4. Run the server:
|
||||
```
|
||||
python main.py
|
||||
```
|
||||
|
||||
## Deploying
|
||||
|
||||
1. Quick Start: Deploy on Railway
|
||||
|
||||
[](https://railway.app/template/HuDPw-?referralCode=jch2ME)
|
||||
|
||||
2. `GCP`, `AWS`, `Azure`
|
||||
This project includes a `Dockerfile` allowing you to build and deploy a Docker Project on your providers
|
||||
|
||||
# Support / Talk with founders
|
||||
|
||||
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||
|
||||
## Roadmap
|
||||
|
||||
- [ ] Implement user-based rate-limiting
|
||||
- [ ] Spending controls per project - expose key creation endpoint
|
||||
- [ ] Need to store a keys db -> mapping created keys to their alias (i.e. project name)
|
||||
- [ ] Easily add new models as backups / as the entry-point (add this to the available model list)
|
Binary file not shown.
Before Width: | Height: | Size: 232 KiB |
Binary file not shown.
Before Width: | Height: | Size: 293 KiB |
@@ -1,102 +0,0 @@
|
||||
import traceback
|
||||
from flask import Flask, request, Response
|
||||
from flask_cors import CORS
|
||||
import litellm
|
||||
from util import handle_error
|
||||
from litellm import completion
|
||||
import os
|
||||
import dotenv
|
||||
import time
|
||||
import json
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# TODO: set your keys in .env or here:
|
||||
# os.environ["OPENAI_API_KEY"] = "" # set your openai key here
|
||||
# os.environ["ANTHROPIC_API_KEY"] = "" # set your anthropic key here
|
||||
# os.environ["TOGETHER_AI_API_KEY"] = "" # set your together ai key here
|
||||
# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/
|
||||
######### ENVIRONMENT VARIABLES ##########
|
||||
verbose = True
|
||||
|
||||
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
|
||||
######### PROMPT LOGGING ##########
|
||||
os.environ["PROMPTLAYER_API_KEY"] = (
|
||||
"" # set your promptlayer key here - https://promptlayer.com/
|
||||
)
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["promptlayer"]
|
||||
############ HELPER FUNCTIONS ###################################
|
||||
|
||||
|
||||
def print_verbose(print_statement):
|
||||
if verbose:
|
||||
print(print_statement)
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return "received!", 200
|
||||
|
||||
|
||||
def data_generator(response):
|
||||
for chunk in response:
|
||||
yield f"data: {json.dumps(chunk)}\n\n"
|
||||
|
||||
|
||||
@app.route("/chat/completions", methods=["POST"])
|
||||
def api_completion():
|
||||
data = request.json
|
||||
start_time = time.time()
|
||||
if data.get("stream") == "True":
|
||||
data["stream"] = True # convert to boolean
|
||||
try:
|
||||
if "prompt" not in data:
|
||||
raise ValueError("data needs to have prompt")
|
||||
data["model"] = (
|
||||
"togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
|
||||
)
|
||||
# COMPLETION CALL
|
||||
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": data.pop("prompt")},
|
||||
]
|
||||
data["messages"] = messages
|
||||
print(f"data: {data}")
|
||||
response = completion(**data)
|
||||
## LOG SUCCESS
|
||||
end_time = time.time()
|
||||
if (
|
||||
"stream" in data and data["stream"] == True
|
||||
): # use generate_responses to stream responses
|
||||
return Response(data_generator(response), mimetype="text/event-stream")
|
||||
except Exception:
|
||||
# call handle_error function
|
||||
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
|
||||
## LOG FAILURE
|
||||
end_time = time.time()
|
||||
traceback_exception = traceback.format_exc()
|
||||
return handle_error(data=data)
|
||||
return response
|
||||
|
||||
|
||||
@app.route("/get_models", methods=["POST"])
|
||||
def get_models():
|
||||
try:
|
||||
return litellm.model_list
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
response = {"error": str(e)}
|
||||
return response, 200
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from waitress import serve
|
||||
|
||||
serve(app, host="0.0.0.0", port=4000, threads=500)
|
@@ -1,89 +0,0 @@
|
||||
import requests
|
||||
|
||||
|
||||
def get_next_url(response):
|
||||
"""
|
||||
Function to get 'next' url from Link header
|
||||
:param response: response from requests
|
||||
:return: next url or None
|
||||
"""
|
||||
if "link" not in response.headers:
|
||||
return None
|
||||
headers = response.headers
|
||||
|
||||
next_url = headers["Link"]
|
||||
print(next_url)
|
||||
start_index = next_url.find("<")
|
||||
end_index = next_url.find(">")
|
||||
|
||||
return next_url[1:end_index]
|
||||
|
||||
|
||||
def get_models(url):
|
||||
"""
|
||||
Function to retrieve all models from paginated endpoint
|
||||
:param url: base url to make GET request
|
||||
:return: list of all models
|
||||
"""
|
||||
models = []
|
||||
while url:
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to retrieve data. Status code: {response.status_code}")
|
||||
return models
|
||||
payload = response.json()
|
||||
url = get_next_url(response)
|
||||
models.extend(payload)
|
||||
return models
|
||||
|
||||
|
||||
def get_cleaned_models(models):
|
||||
"""
|
||||
Function to clean retrieved models
|
||||
:param models: list of retrieved models
|
||||
:return: list of cleaned models
|
||||
"""
|
||||
cleaned_models = []
|
||||
for model in models:
|
||||
cleaned_models.append(model["id"])
|
||||
return cleaned_models
|
||||
|
||||
|
||||
# Get text-generation models
|
||||
url = "https://huggingface.co/api/models?filter=text-generation-inference"
|
||||
text_generation_models = get_models(url)
|
||||
cleaned_text_generation_models = get_cleaned_models(text_generation_models)
|
||||
|
||||
print(cleaned_text_generation_models)
|
||||
|
||||
|
||||
# Get conversational models
|
||||
url = "https://huggingface.co/api/models?filter=conversational"
|
||||
conversational_models = get_models(url)
|
||||
cleaned_conversational_models = get_cleaned_models(conversational_models)
|
||||
|
||||
print(cleaned_conversational_models)
|
||||
|
||||
|
||||
def write_to_txt(cleaned_models, filename):
|
||||
"""
|
||||
Function to write the contents of a list to a text file
|
||||
:param cleaned_models: list of cleaned models
|
||||
:param filename: name of the text file
|
||||
"""
|
||||
with open(filename, "w") as f:
|
||||
for item in cleaned_models:
|
||||
f.write("%s\n" % item)
|
||||
|
||||
|
||||
# Write contents of cleaned_text_generation_models to text_generation_models.txt
|
||||
write_to_txt(
|
||||
cleaned_text_generation_models,
|
||||
"huggingface_llms_metadata/hf_text_generation_models.txt",
|
||||
)
|
||||
|
||||
# Write contents of cleaned_conversational_models to conversational_models.txt
|
||||
write_to_txt(
|
||||
cleaned_conversational_models,
|
||||
"huggingface_llms_metadata/hf_conversational_models.txt",
|
||||
)
|
@@ -1,93 +0,0 @@
|
||||
{
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002
|
||||
},
|
||||
"gpt-3.5-turbo-0613": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004
|
||||
},
|
||||
"gpt-3.5-turbo-16k-0613": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.00006
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.00006
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012
|
||||
},
|
||||
"claude-instant-1": {
|
||||
"max_tokens": 100000,
|
||||
"input_cost_per_token": 0.00000163,
|
||||
"output_cost_per_token": 0.00000551
|
||||
},
|
||||
"claude-2": {
|
||||
"max_tokens": 100000,
|
||||
"input_cost_per_token": 0.00001102,
|
||||
"output_cost_per_token": 0.00003268
|
||||
},
|
||||
"text-bison-001": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.000004,
|
||||
"output_cost_per_token": 0.000004
|
||||
},
|
||||
"chat-bison-001": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000002
|
||||
},
|
||||
"command-nightly": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015
|
||||
},
|
||||
"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000608,
|
||||
"output_cost_per_token": 0.00000608
|
||||
},
|
||||
"together-ai-up-to-3b": {
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.0000001
|
||||
},
|
||||
"together-ai-3.1b-7b": {
|
||||
"input_cost_per_token": 0.0000002,
|
||||
"output_cost_per_token": 0.0000002
|
||||
},
|
||||
"together-ai-7.1b-20b": {
|
||||
"max_tokens": 1000,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000004
|
||||
},
|
||||
"together-ai-20.1b-40b": {
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000001
|
||||
},
|
||||
"together-ai-40.1b-70b": {
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000003
|
||||
}
|
||||
}
|
@@ -1,412 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7aa8875d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google ADK with LiteLLM\n",
|
||||
"\n",
|
||||
"Use Google ADK with LiteLLM Python SDK, LiteLLM Proxy.\n",
|
||||
"\n",
|
||||
"This tutorial shows you how to create intelligent agents using Agent Development Kit (ADK) with support for multiple Large Language Model (LLM) providers through LiteLLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4d249c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"ADK (Agent Development Kit) allows you to build intelligent agents powered by LLMs. By integrating with LiteLLM, you can:\n",
|
||||
"\n",
|
||||
"- Use multiple LLM providers (OpenAI, Anthropic, Google, etc.)\n",
|
||||
"- Switch easily between models from different providers\n",
|
||||
"- Connect to a LiteLLM proxy for centralized model management"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0bbb56b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"- Python environment setup\n",
|
||||
"- API keys for model providers (OpenAI, Anthropic, Google AI Studio)\n",
|
||||
"- Basic understanding of LLMs and agent concepts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7fee50a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "44106a23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install dependencies\n",
|
||||
"!pip install google-adk litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2171740a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Setting Up Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6695807e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup environment and API keys\n",
|
||||
"import os\n",
|
||||
"import asyncio\n",
|
||||
"from google.adk.agents import Agent\n",
|
||||
"from google.adk.models.lite_llm import LiteLlm # For multi-model support\n",
|
||||
"from google.adk.sessions import InMemorySessionService\n",
|
||||
"from google.adk.runners import Runner\n",
|
||||
"from google.genai import types\n",
|
||||
"import litellm # Import for proxy configuration\n",
|
||||
"\n",
|
||||
"# Set your API keys\n",
|
||||
"os.environ['GOOGLE_API_KEY'] = 'your-google-api-key' # For Gemini models\n",
|
||||
"os.environ['OPENAI_API_KEY'] = 'your-openai-api-key' # For OpenAI models\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = 'your-anthropic-api-key' # For Claude models\n",
|
||||
"\n",
|
||||
"# Define model constants for cleaner code\n",
|
||||
"MODEL_GEMINI_PRO = 'gemini-1.5-pro'\n",
|
||||
"MODEL_GPT_4O = 'openai/gpt-4o'\n",
|
||||
"MODEL_CLAUDE_SONNET = 'anthropic/claude-3-sonnet-20240229'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2b1ed59",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Define a Simple Tool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "04b3ef5b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Weather tool implementation\n",
|
||||
"def get_weather(city: str) -> dict:\n",
|
||||
" \"\"\"Retrieves the current weather report for a specified city.\"\"\"\n",
|
||||
" print(f'Tool: get_weather called for city: {city}')\n",
|
||||
"\n",
|
||||
" # Mock weather data\n",
|
||||
" mock_weather_db = {\n",
|
||||
" 'newyork': {\n",
|
||||
" 'status': 'success',\n",
|
||||
" 'report': 'The weather in New York is sunny with a temperature of 25°C.'\n",
|
||||
" },\n",
|
||||
" 'london': {\n",
|
||||
" 'status': 'success',\n",
|
||||
" 'report': \"It's cloudy in London with a temperature of 15°C.\"\n",
|
||||
" },\n",
|
||||
" 'tokyo': {\n",
|
||||
" 'status': 'success',\n",
|
||||
" 'report': 'Tokyo is experiencing light rain and a temperature of 18°C.'\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" city_normalized = city.lower().replace(' ', '')\n",
|
||||
"\n",
|
||||
" if city_normalized in mock_weather_db:\n",
|
||||
" return mock_weather_db[city_normalized]\n",
|
||||
" else:\n",
|
||||
" return {\n",
|
||||
" 'status': 'error',\n",
|
||||
" 'error_message': f\"Sorry, I don't have weather information for '{city}'.\"\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "727b15c9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Helper Function for Agent Interaction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f77449bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Agent interaction helper function\n",
|
||||
"async def call_agent_async(query: str, runner, user_id, session_id):\n",
|
||||
" \"\"\"Sends a query to the agent and prints the final response.\"\"\"\n",
|
||||
" print(f'\\n>>> User Query: {query}')\n",
|
||||
"\n",
|
||||
" content = types.Content(role='user', parts=[types.Part(text=query)])\n",
|
||||
" final_response_text = 'Agent did not produce a final response.'\n",
|
||||
"\n",
|
||||
" async for event in runner.run_async(\n",
|
||||
" user_id=user_id,\n",
|
||||
" session_id=session_id,\n",
|
||||
" new_message=content\n",
|
||||
" ):\n",
|
||||
" if event.is_final_response():\n",
|
||||
" if event.content and event.content.parts:\n",
|
||||
" final_response_text = event.content.parts[0].text\n",
|
||||
" break\n",
|
||||
" print(f'<<< Agent Response: {final_response_text}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ac87987",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Using Different Model Providers with ADK\n",
|
||||
"\n",
|
||||
"### 4.1 Using OpenAI Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e167d557",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OpenAI model implementation\n",
|
||||
"weather_agent_gpt = Agent(\n",
|
||||
" name='weather_agent_gpt',\n",
|
||||
" model=LiteLlm(model=MODEL_GPT_4O),\n",
|
||||
" description='Provides weather information using OpenAI\\'s GPT.',\n",
|
||||
" instruction=(\n",
|
||||
" 'You are a helpful weather assistant powered by GPT-4o. '\n",
|
||||
" \"Use the 'get_weather' tool for city weather requests. \"\n",
|
||||
" 'Present information clearly.'\n",
|
||||
" ),\n",
|
||||
" tools=[get_weather],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"session_service_gpt = InMemorySessionService()\n",
|
||||
"session_gpt = session_service_gpt.create_session(\n",
|
||||
" app_name='weather_app', user_id='user_1', session_id='session_gpt'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"runner_gpt = Runner(\n",
|
||||
" agent=weather_agent_gpt,\n",
|
||||
" app_name='weather_app',\n",
|
||||
" session_service=session_service_gpt,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"async def test_gpt_agent():\n",
|
||||
" print('\\n--- Testing GPT Agent ---')\n",
|
||||
" await call_agent_async(\n",
|
||||
" \"What's the weather in London?\",\n",
|
||||
" runner=runner_gpt,\n",
|
||||
" user_id='user_1',\n",
|
||||
" session_id='session_gpt',\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# To execute in a notebook cell:\n",
|
||||
"# await test_gpt_agent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f9cb0613",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.2 Using Anthropic Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1c653665",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Anthropic model implementation\n",
|
||||
"weather_agent_claude = Agent(\n",
|
||||
" name='weather_agent_claude',\n",
|
||||
" model=LiteLlm(model=MODEL_CLAUDE_SONNET),\n",
|
||||
" description='Provides weather information using Anthropic\\'s Claude.',\n",
|
||||
" instruction=(\n",
|
||||
" 'You are a helpful weather assistant powered by Claude Sonnet. '\n",
|
||||
" \"Use the 'get_weather' tool for city weather requests. \"\n",
|
||||
" 'Present information clearly.'\n",
|
||||
" ),\n",
|
||||
" tools=[get_weather],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"session_service_claude = InMemorySessionService()\n",
|
||||
"session_claude = session_service_claude.create_session(\n",
|
||||
" app_name='weather_app', user_id='user_1', session_id='session_claude'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"runner_claude = Runner(\n",
|
||||
" agent=weather_agent_claude,\n",
|
||||
" app_name='weather_app',\n",
|
||||
" session_service=session_service_claude,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"async def test_claude_agent():\n",
|
||||
" print('\\n--- Testing Claude Agent ---')\n",
|
||||
" await call_agent_async(\n",
|
||||
" \"What's the weather in Tokyo?\",\n",
|
||||
" runner=runner_claude,\n",
|
||||
" user_id='user_1',\n",
|
||||
" session_id='session_claude',\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# To execute in a notebook cell:\n",
|
||||
"# await test_claude_agent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bf9d863b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.3 Using Google's Gemini Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "83f49d0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Gemini model implementation\n",
|
||||
"weather_agent_gemini = Agent(\n",
|
||||
" name='weather_agent_gemini',\n",
|
||||
" model=MODEL_GEMINI_PRO,\n",
|
||||
" description='Provides weather information using Google\\'s Gemini.',\n",
|
||||
" instruction=(\n",
|
||||
" 'You are a helpful weather assistant powered by Gemini Pro. '\n",
|
||||
" \"Use the 'get_weather' tool for city weather requests. \"\n",
|
||||
" 'Present information clearly.'\n",
|
||||
" ),\n",
|
||||
" tools=[get_weather],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"session_service_gemini = InMemorySessionService()\n",
|
||||
"session_gemini = session_service_gemini.create_session(\n",
|
||||
" app_name='weather_app', user_id='user_1', session_id='session_gemini'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"runner_gemini = Runner(\n",
|
||||
" agent=weather_agent_gemini,\n",
|
||||
" app_name='weather_app',\n",
|
||||
" session_service=session_service_gemini,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"async def test_gemini_agent():\n",
|
||||
" print('\\n--- Testing Gemini Agent ---')\n",
|
||||
" await call_agent_async(\n",
|
||||
" \"What's the weather in New York?\",\n",
|
||||
" runner=runner_gemini,\n",
|
||||
" user_id='user_1',\n",
|
||||
" session_id='session_gemini',\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# To execute in a notebook cell:\n",
|
||||
"# await test_gemini_agent()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93bc5fd0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Using LiteLLM Proxy with ADK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4275151",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"| Variable | Description |\n",
|
||||
"|----------|-------------|\n",
|
||||
"| `LITELLM_PROXY_API_KEY` | The API key for the LiteLLM proxy |\n",
|
||||
"| `LITELLM_PROXY_API_BASE` | The base URL for the LiteLLM proxy |\n",
|
||||
"| `USE_LITELLM_PROXY` or `litellm.use_litellm_proxy` | When set to True, your request will be sent to LiteLLM proxy. |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "256530a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LiteLLM proxy integration\n",
|
||||
"os.environ['LITELLM_PROXY_API_KEY'] = 'your-litellm-proxy-api-key'\n",
|
||||
"os.environ['LITELLM_PROXY_API_BASE'] = 'your-litellm-proxy-url' # e.g., 'http://localhost:4000'\n",
|
||||
"litellm.use_litellm_proxy = True\n",
|
||||
"\n",
|
||||
"weather_agent_proxy_env = Agent(\n",
|
||||
" name='weather_agent_proxy_env',\n",
|
||||
" model=LiteLlm(model='gpt-4o'),\n",
|
||||
" description='Provides weather information using a model from LiteLLM proxy.',\n",
|
||||
" instruction=(\n",
|
||||
" 'You are a helpful weather assistant. '\n",
|
||||
" \"Use the 'get_weather' tool for city weather requests. \"\n",
|
||||
" 'Present information clearly.'\n",
|
||||
" ),\n",
|
||||
" tools=[get_weather],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"session_service_proxy_env = InMemorySessionService()\n",
|
||||
"session_proxy_env = session_service_proxy_env.create_session(\n",
|
||||
" app_name='weather_app', user_id='user_1', session_id='session_proxy_env'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"runner_proxy_env = Runner(\n",
|
||||
" agent=weather_agent_proxy_env,\n",
|
||||
" app_name='weather_app',\n",
|
||||
" session_service=session_service_proxy_env,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"async def test_proxy_env_agent():\n",
|
||||
" print('\\n--- Testing Proxy-enabled Agent (Environment Variables) ---')\n",
|
||||
" await call_agent_async(\n",
|
||||
" \"What's the weather in London?\",\n",
|
||||
" runner=runner_proxy_env,\n",
|
||||
" user_id='user_1',\n",
|
||||
" session_id='session_proxy_env',\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# To execute in a notebook cell:\n",
|
||||
"# await test_proxy_env_agent()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@@ -1,251 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# LiteLLM A121 Tutorial\n",
|
||||
"\n",
|
||||
"This walks through using A121 Jurassic models\n",
|
||||
"* j2-light\n",
|
||||
"* j2-mid\n",
|
||||
"* j2-ultra"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "LeFYo8iqcn5g"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "GslPQFmaZsp-"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"import os"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "P3cKiqURZx7P"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Set A121 Keys\n",
|
||||
"You can get a free key from https://studio.ai21.com/account/api-key"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "tmTvA1_GaNU4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"os.environ[\"AI21_API_KEY\"] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"id": "_xX8LmxAZ2vp"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# A121 Supported Models:\n",
|
||||
"https://studio.ai21.com/foundation-models"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Fx5ZfJTLbF0A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## J2-light Call"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "H0tl-0Z3bDaL"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
||||
"response = completion(model=\"j2-light\", messages=messages)\n",
|
||||
"response"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "DZnApsJUZ_I2",
|
||||
"outputId": "b5707cbe-f67c-47f7-bac5-a7b8af1ba815"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<ModelResponse at 0x7b2c2902e610> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" However, I have an important question to ask you\\nMy name is X, and I was wondering if you would be willing to help me.\",\n",
|
||||
" \"role\": \"assistant\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1692761063.5189915,\n",
|
||||
" \"model\": \"j2-light\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": null,\n",
|
||||
" \"completion_tokens\": null,\n",
|
||||
" \"total_tokens\": null\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 6
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# J2-Mid"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "wCcnrYnnbMQA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [{ \"content\": \"what model are you\",\"role\": \"user\"}]\n",
|
||||
"response = completion(model=\"j2-mid\", messages=messages)\n",
|
||||
"response"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "-5Sxf4blaeEl",
|
||||
"outputId": "6264a5e8-16d6-44a3-e167-9e0c59b6dbc4"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<ModelResponse at 0x7b2c2902f6a0> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"\\nplease choose the model from the list below\\nModel view in Tekla Structures\",\n",
|
||||
" \"role\": \"assistant\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1692761140.0017524,\n",
|
||||
" \"model\": \"j2-mid\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": null,\n",
|
||||
" \"completion_tokens\": null,\n",
|
||||
" \"total_tokens\": null\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 7
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# J2-Ultra"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "wDARpjxtbUcg"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"messages = [{ \"content\": \"what model are you\",\"role\": \"user\"}]\n",
|
||||
"response = completion(model=\"j2-ultra\", messages=messages)\n",
|
||||
"response"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "i228xwsYbSYo",
|
||||
"outputId": "3765ac56-5a9b-442e-b357-2e346d02e1df"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<ModelResponse at 0x7b2c28fd4090> JSON: {\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \"\\nI am not a specific model, but I can provide information and assistance based on my training data. Please let me know if there is anything you\",\n",
|
||||
" \"role\": \"assistant\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"created\": 1692761157.8675153,\n",
|
||||
" \"model\": \"j2-ultra\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": null,\n",
|
||||
" \"completion_tokens\": null,\n",
|
||||
" \"total_tokens\": null\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 8
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,237 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gZx-wHJapG5w"
|
||||
},
|
||||
"source": [
|
||||
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
|
||||
"\n",
|
||||
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
|
||||
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
|
||||
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
|
||||
"Example call\n",
|
||||
"```python\n",
|
||||
"model = \"q841o8w\" # baseten model version ID\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "4JSRa0QVogPo"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm==0.1.399\n",
|
||||
"!pip install baseten urllib3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "VEukLhDzo4vw"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4STYM2OHFNlc"
|
||||
},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {
|
||||
"id": "DorpLxw1FHbC"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
|
||||
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "syF3dTdKFSQQ"
|
||||
},
|
||||
"source": [
|
||||
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "rPgSoMlsojz0",
|
||||
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
|
||||
" 'created': 1692135883.699066,\n",
|
||||
" 'model': 'qvv0xeq'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"qvv0xeq\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "7n21UroEGCGa"
|
||||
},
|
||||
"source": [
|
||||
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "uLVWFH899lAF",
|
||||
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
|
||||
" 'created': 1692135900.2806294,\n",
|
||||
" 'model': 'q841o8w'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"q841o8w\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "6-TFwmPAGPXq"
|
||||
},
|
||||
"source": [
|
||||
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
|
||||
"### Pass Your Baseten model `Version ID` as `model`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "gbeYZOrUE_Bp",
|
||||
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mINFO\u001b[0m API key set.\n",
|
||||
"INFO:baseten:API key set.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'choices': [{'finish_reason': 'stop',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'message': {'role': 'assistant',\n",
|
||||
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
|
||||
" 'created': 1692135914.7472186,\n",
|
||||
" 'model': '31dxrj3'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"31dxrj3\"\n",
|
||||
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
|
||||
"response"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,411 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "MZ01up0p7wOJ"
|
||||
},
|
||||
"source": [
|
||||
"## 🚅 liteLLM Quick Start Demo\n",
|
||||
"### TLDR: Call 50+ LLM APIs using chatGPT Input/Output format\n",
|
||||
"https://github.com/BerriAI/litellm\n",
|
||||
"\n",
|
||||
"liteLLM is package to simplify calling **OpenAI, Azure, Llama2, Cohere, Anthropic, Huggingface API Endpoints**. LiteLLM manages\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "RZtzCnQS7rW-"
|
||||
},
|
||||
"source": [
|
||||
"## Installation and setting Params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "rsrN5W-N7L8d"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "ArrWyG5b7QAG"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "bbhJRt34_NJ1"
|
||||
},
|
||||
"source": [
|
||||
"## Set your API keys\n",
|
||||
"- liteLLM reads your .env, env variables or key manager for Auth\n",
|
||||
"\n",
|
||||
"Set keys for the models you want to use below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"id": "-h8Ga5cR7SvV"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Only set keys for the LLMs you want to use\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"\" #@param\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = \"\" #@param\n",
|
||||
"os.environ[\"REPLICATE_API_KEY\"] = \"\" #@param\n",
|
||||
"os.environ[\"COHERE_API_KEY\"] = \"\" #@param\n",
|
||||
"os.environ[\"AZURE_API_BASE\"] = \"\" #@param\n",
|
||||
"os.environ[\"AZURE_API_VERSION\"] = \"\" #@param\n",
|
||||
"os.environ[\"AZURE_API_KEY\"] = \"\" #@param"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "fhqpKv6L8fBj"
|
||||
},
|
||||
"source": [
|
||||
"## Call chatGPT"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "speIkoX_8db4",
|
||||
"outputId": "331a6c65-f121-4e65-e121-bf8aaad05d9d"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject chat.completion id=chatcmpl-820kPkRwSLml4X6165fWbZlEDOedr at 0x12ff93630> JSON: {\n",
|
||||
" \"id\": \"chatcmpl-820kPkRwSLml4X6165fWbZlEDOedr\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1695490221,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"I'm sorry, but as an AI text-based model, I don't have real-time information. However, you can check the current weather in San Francisco by searching for \\\"weather in SF\\\" on any search engine or checking a weather website or app.\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 13,\n",
|
||||
" \"completion_tokens\": 51,\n",
|
||||
" \"total_tokens\": 64\n",
|
||||
" },\n",
|
||||
" \"response_ms\": 2385.592\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"completion(model=\"gpt-3.5-turbo\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Q3jV1Uxv8zNo"
|
||||
},
|
||||
"source": [
|
||||
"## Call Claude-2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "V8yTWYzY8m9S",
|
||||
"outputId": "8b6dd32d-f9bf-4e89-886d-47cb8020f025"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<ModelResponse chat.completion id=chatcmpl-6d1a40c0-19c0-4bd7-9ca2-a91d8b8c2295 at 0x12ff85a40> JSON: {\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop_sequence\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" Unfortunately I don't have enough context to know the exact location you are asking about when you say \\\"SF\\\". SF could refer to San Francisco, California, or potentially other cities that go by SF as an abbreviation. To get an accurate weather report, it would be helpful if you could provide the full city name and state/country. If you are looking for the weather in San Francisco, California, I would be happy to provide that forecast. Please let me know the specific location you want the weather for.\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-6d1a40c0-19c0-4bd7-9ca2-a91d8b8c2295\",\n",
|
||||
" \"created\": 1695490260.983768,\n",
|
||||
" \"response_ms\": 6351.544,\n",
|
||||
" \"model\": \"claude-2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"completion_tokens\": 102,\n",
|
||||
" \"total_tokens\": 116\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"completion(model=\"claude-2\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "yu0LPDmW9PJa"
|
||||
},
|
||||
"source": [
|
||||
"## Call llama2 on replicate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "0GWV5mtO9Jbu",
|
||||
"outputId": "38538825-b271-406d-a437-f5cf0eb7e548"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<ModelResponse chat.completion id=chatcmpl-3151c2eb-b26f-4c96-89b5-ed1746b219e0 at 0x138b87e50> JSON: {\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm happy to help! However, I must point out that the question \\\"what's the weather in SF\\\" doesn't make sense as \\\"SF\\\" could refer to multiple locations. Could you please clarify which location you are referring to? San Francisco, California or Sioux Falls, South Dakota? Once I have more context, I would be happy to provide you with accurate and reliable information.\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-3151c2eb-b26f-4c96-89b5-ed1746b219e0\",\n",
|
||||
" \"created\": 1695490237.714101,\n",
|
||||
" \"response_ms\": 12109.565,\n",
|
||||
" \"model\": \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 78,\n",
|
||||
" \"total_tokens\": 84\n",
|
||||
" },\n",
|
||||
" \"ended\": 1695490249.821266\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\n",
|
||||
"completion(model=model, messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "HXdj5SEe9iLK"
|
||||
},
|
||||
"source": [
|
||||
"## Call Command-Nightly"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "EaUq2xIx9fhr",
|
||||
"outputId": "55fe6f52-b58b-4729-948a-74dac4b431b2"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<ModelResponse chat.completion id=chatcmpl-dc0d8ead-071d-486c-a111-78975b38794b at 0x1389725e0> JSON: {\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" As an AI model I don't have access to real-time data, so I can't tell\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-dc0d8ead-071d-486c-a111-78975b38794b\",\n",
|
||||
" \"created\": 1695490235.936903,\n",
|
||||
" \"response_ms\": 1022.6759999999999,\n",
|
||||
" \"model\": \"command-nightly\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 6,\n",
|
||||
" \"completion_tokens\": 19,\n",
|
||||
" \"total_tokens\": 25\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"completion(model=\"command-nightly\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "1g9hSgsL9soJ"
|
||||
},
|
||||
"source": [
|
||||
"## Call Azure OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For azure openai calls ensure to add the `azure/` prefix to `model`. If your deployment-id is `chatgpt-test` set `model` = `azure/chatgpt-test`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "AvLjR-PF-lt0",
|
||||
"outputId": "deff2db3-b003-48cd-ea62-c03a68a4464a"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject chat.completion id=chatcmpl-820kZyCwbNvZATiLkNmXmpxxzvTKO at 0x138b84ae0> JSON: {\n",
|
||||
" \"id\": \"chatcmpl-820kZyCwbNvZATiLkNmXmpxxzvTKO\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1695490231,\n",
|
||||
" \"model\": \"gpt-35-turbo\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"Sorry, as an AI language model, I don't have real-time information. Please check your preferred weather website or app for the latest weather updates of San Francisco.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"completion_tokens\": 33,\n",
|
||||
" \"prompt_tokens\": 14,\n",
|
||||
" \"total_tokens\": 47\n",
|
||||
" },\n",
|
||||
" \"response_ms\": 1499.529\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"completion(model=\"azure/chatgpt-v-2\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
@@ -1,195 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5hwntUxTMxEk"
|
||||
},
|
||||
"source": [
|
||||
"# Langchain liteLLM Demo Notebook\n",
|
||||
"## Use `ChatLiteLLM()` to instantly support 50+ LLM models\n",
|
||||
"Langchain Docs: https://python.langchain.com/docs/integrations/chat/litellm\n",
|
||||
"\n",
|
||||
"Call all LLM models using the same I/O interface\n",
|
||||
"\n",
|
||||
"Example usage\n",
|
||||
"```python\n",
|
||||
"ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
|
||||
"ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
|
||||
"ChatLiteLLM(model=\"command-nightly\")\n",
|
||||
"ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "aPNAUsCvB6Sv"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "MOhRaVnhB-0J"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.chat_models import ChatLiteLLM\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "TahkCtlmCD65",
|
||||
"outputId": "5ddda40f-f252-4830-a8d6-bd3fa68ae487"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='I am an AI model known as GPT-3, developed by OpenAI.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "uXNDyU4jChcs",
|
||||
"outputId": "bd74b4c6-f9fb-42dc-fdc3-9240d50503ba"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" I'm Claude, an AI assistant created by Anthropic.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['ANTHROPIC_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"claude-2\", temperature=0.3)\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "czbDJRKcC7BV",
|
||||
"outputId": "892e147d-831e-4884-dc71-040f92c3fb8e"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" I'm an AI based based on LLaMA models (LLaMA: Open and Efficient Foundation Language Models, Touvron et al. 2023), my knowledge was built from a massive corpus of text, including books, articles, and websites, and I was trained using a variety of machine learning algorithms. My model architecture is based on the transformer architecture, which is particularly well-suited for natural language processing tasks. My team of developers and I are constantly working to improve and fine-tune my performance, and I am always happy to help with any questions you may have!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['REPLICATE_API_TOKEN'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you?\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tZxpq5PDDY9Y",
|
||||
"outputId": "7e86f4ed-ac7a-45e1-87d0-217da6cad666"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' I am an AI-based large language model, or Chatbot, built by the company Cohere. I am designed to have polite, helpful, inclusive conversations with users. I am always learning and improving, and I am constantly being updated with new information and improvements.\\n\\nI am currently in the development phase, and I am not yet available to the general public. However, I am currently being used by a select group of users for testing and feedback.\\n\\nI am a large language model, which means that I am trained on a massive amount of data and can understand and respond to a wide range of requests and questions. I am also designed to be flexible and adaptable, so I can be customized to suit the needs of different users and use cases.\\n\\nI am currently being used to develop a range of applications, including customer service chatbots, content generation tools, and language translation services. I am also being used to train other language models and to develop new ways of using large language models.\\n\\nI am constantly being updated with new information and improvements, so I am always learning and improving. I am also being used to develop new ways of using large language models, so I am always evolving and adapting to new use cases and requirements.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['COHERE_API_KEY'] = \"\"\n",
|
||||
"chat = ChatLiteLLM(model=\"command-nightly\")\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"what model are you?\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,289 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm # version 0.1.724 or higher "
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Call Ollama - llama2 with Streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<generator object get_ollama_response_stream at 0x109096c10>\n",
|
||||
"{'role': 'assistant', 'content': ' I'}\n",
|
||||
"{'role': 'assistant', 'content': \"'\"}\n",
|
||||
"{'role': 'assistant', 'content': 'm'}\n",
|
||||
"{'role': 'assistant', 'content': ' L'}\n",
|
||||
"{'role': 'assistant', 'content': 'La'}\n",
|
||||
"{'role': 'assistant', 'content': 'MA'}\n",
|
||||
"{'role': 'assistant', 'content': ','}\n",
|
||||
"{'role': 'assistant', 'content': ' an'}\n",
|
||||
"{'role': 'assistant', 'content': ' A'}\n",
|
||||
"{'role': 'assistant', 'content': 'I'}\n",
|
||||
"{'role': 'assistant', 'content': ' assistant'}\n",
|
||||
"{'role': 'assistant', 'content': ' developed'}\n",
|
||||
"{'role': 'assistant', 'content': ' by'}\n",
|
||||
"{'role': 'assistant', 'content': ' Meta'}\n",
|
||||
"{'role': 'assistant', 'content': ' A'}\n",
|
||||
"{'role': 'assistant', 'content': 'I'}\n",
|
||||
"{'role': 'assistant', 'content': ' that'}\n",
|
||||
"{'role': 'assistant', 'content': ' can'}\n",
|
||||
"{'role': 'assistant', 'content': ' understand'}\n",
|
||||
"{'role': 'assistant', 'content': ' and'}\n",
|
||||
"{'role': 'assistant', 'content': ' respond'}\n",
|
||||
"{'role': 'assistant', 'content': ' to'}\n",
|
||||
"{'role': 'assistant', 'content': ' human'}\n",
|
||||
"{'role': 'assistant', 'content': ' input'}\n",
|
||||
"{'role': 'assistant', 'content': ' in'}\n",
|
||||
"{'role': 'assistant', 'content': ' a'}\n",
|
||||
"{'role': 'assistant', 'content': ' convers'}\n",
|
||||
"{'role': 'assistant', 'content': 'ational'}\n",
|
||||
"{'role': 'assistant', 'content': ' manner'}\n",
|
||||
"{'role': 'assistant', 'content': '.'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"ollama/llama2\", \n",
|
||||
" messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
|
||||
" api_base=\"http://localhost:11434\",\n",
|
||||
" stream=True\n",
|
||||
")\n",
|
||||
"print(response)\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk['choices'][0]['delta'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Call Ollama - Llama2 with Acompletion + Streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||||
"Requirement already satisfied: async_generator in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (1.10)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# litellm uses async_generator for ollama async streaming, ensure it's installed\n",
|
||||
"!pip install async_generator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': 'm'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' just'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' an'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' A'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': 'I'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' don'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': \"'\"}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': 't'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' have'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' access'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' real'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': '-'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': 'time'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' or'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' current'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' conditions'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' in'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' specific'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' живело'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' can'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' provide'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' with'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' weather'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' forec'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': 'asts'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' information'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' for'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' your'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' location'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' if'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' would'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' like'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' Please'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' let'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' me'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' know'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' where'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' are'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' located'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ','}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' and'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' I'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' will'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' do'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' my'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' best'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' to'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' assist'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': ' you'}}]}\n",
|
||||
"{'choices': [{'delta': {'role': 'assistant', 'content': '.'}}]}\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"\n",
|
||||
"async def async_ollama():\n",
|
||||
" response = await litellm.acompletion(\n",
|
||||
" model=\"ollama/llama2\", \n",
|
||||
" messages=[{ \"content\": \"what's the weather\" ,\"role\": \"user\"}], \n",
|
||||
" api_base=\"http://localhost:11434\", \n",
|
||||
" stream=True\n",
|
||||
" )\n",
|
||||
" async for chunk in response:\n",
|
||||
" print(chunk)\n",
|
||||
"\n",
|
||||
"result = await async_ollama()\n",
|
||||
"print(result)\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" async for chunk in result:\n",
|
||||
" print(chunk)\n",
|
||||
"except TypeError: # the last chunk is None from Ollama, this raises an error with async streaming\n",
|
||||
" pass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Completion Call"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"finish_reason\": \"stop\",\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"content\": \" I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.\",\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"logprobs\": null\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"id\": \"chatcmpl-ea7b8242-791f-4656-ba12-e098edeb960e\",\n",
|
||||
" \"created\": 1695324686.6696231,\n",
|
||||
" \"response_ms\": 4072.3050000000003,\n",
|
||||
" \"model\": \"ollama/llama2\",\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 10,\n",
|
||||
" \"completion_tokens\": 27,\n",
|
||||
" \"total_tokens\": 37\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"response = completion(\n",
|
||||
" model=\"ollama/llama2\", \n",
|
||||
" messages=[{ \"content\": \"respond in 20 words. who are you?\",\"role\": \"user\"}], \n",
|
||||
" api_base=\"http://localhost:11434\"\n",
|
||||
")\n",
|
||||
"print(response)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -1,238 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "YV6L5fNv7Kep"
|
||||
},
|
||||
"source": [
|
||||
"# Call Replicate LLMs using chatGPT Input/Output Format\n",
|
||||
"This tutorial covers using the following Replicate Models with liteLLM\n",
|
||||
"\n",
|
||||
"- [StableLM Tuned Alpha 7B](https://replicate.com/stability-ai/stablelm-tuned-alpha-7b)\n",
|
||||
"- [LLAMA-2 70B Chat](https://replicate.com/replicate/llama-2-70b-chat)\n",
|
||||
"- [A16z infra-LLAMA-2 7B Chat](https://replicate.com/a16z-infra/llama-2-7b-chat)\n",
|
||||
"- [Dolly V2 12B](https://replicate.com/replicate/dolly-v2-12b)\n",
|
||||
"- [Vicuna 13B](https://replicate.com/replicate/vicuna-13b)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "TO-EdF84O9QT"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install liteLLM\n",
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "mpHTbTqQ8fey"
|
||||
},
|
||||
"source": [
|
||||
"Imports & Set ENV variables\n",
|
||||
"Get your Replicate Key: https://replicate.com/account/api-tokens"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "kDbgfcU8O-dW"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"import os\n",
|
||||
"os.environ['REPLICATE_API_TOKEN'] = ' ' # @param\n",
|
||||
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
|
||||
"messages = [{ \"content\": user_message,\"role\": \"user\"}]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "1KmkOdzLSOmJ"
|
||||
},
|
||||
"source": [
|
||||
"## Call Replicate Models using completion(model, messages) - chatGPT format"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "XJ4nh4SnRzHP",
|
||||
"outputId": "986c0544-bb40-4915-f00f-498b0e518307"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"replicate is not installed. Installing...\n",
|
||||
"Response from stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \"I'm sorry for you being unable to access this content as my training data only goes up until 2023/03. However I can tell you what your local weather forecast may look like at any time of year with respect to current conditions:\"}}], 'created': 1691611730.7224207, 'model': 'stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb', 'usage': {'prompt_tokens': 9, 'completion_tokens': 49, 'total_tokens': 58}}\n",
|
||||
"Response from replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1 \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" Hello! I'm happy to help you with your question. However, I must point out that the question itself may not be meaningful. San Francisco is a city located in California, USA, and it is not possible for me to provide you with the current weather conditions there as I am a text-based AI language model and do not have access to real-time weather data. Additionally, the weather in San Francisco can vary greatly depending on the time of year, so it would be best to check a reliable weather source for the most up-to-date information.\\n\\nIf you meant to ask a different question, please feel free to rephrase it, and I will do my best to assist you in a safe and positive manner.\"}}], 'created': 1691611745.0269957, 'model': 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1', 'usage': {'prompt_tokens': 9, 'completion_tokens': 143, 'total_tokens': 152}}\n",
|
||||
"Response from a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': \" Hello! I'm here to help you with your question. However, I must inform you that the weather in San Francisco can be quite unpredictable and can change rapidly. It's important to check reliable sources such as AccuWeather or the National Weather Service for the most up-to-date and accurate information about the weather in San Francisco.\\nI cannot provide you with real-time weather data or forecasts as I'm just an AI and do not have access to current weather conditions or predictions. But I can suggest some trustworthy websites or apps where you can find the latest weather updates:\\n* AccuWeather (accuweather.com)\\n* The Weather Channel (weather.com)\\n* Dark Sky (darksky.net)\\n* Weather Underground (wunderground.com)\\nRemember, it's always best to consult multiple sources for the most accurate information when planning your day or trip. Enjoy your day!\"}}], 'created': 1691611748.7723358, 'model': 'a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc', 'usage': {'prompt_tokens': 9, 'completion_tokens': 174, 'total_tokens': 183}}\n",
|
||||
"Response from replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5 \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': 'Its 68 degrees right now in San Francisco! The temperature will be rising through the week and i expect it to reach 70 on Thursdays and Friday. Skies are expected to be partly cloudy with some sun breaks throughout the day.\\n\\n'}}], 'created': 1691611752.2002115, 'model': 'replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5', 'usage': {'prompt_tokens': 9, 'completion_tokens': 48, 'total_tokens': 57}}\n",
|
||||
"Response from replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': ''}}], 'created': 1691611752.8998356, 'model': 'replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b', 'usage': {'prompt_tokens': 9, 'completion_tokens': 0, 'total_tokens': 9}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llama_2 = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\n",
|
||||
"llama_2_7b = \"a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\"\n",
|
||||
"dolly_v2 = \"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5\"\n",
|
||||
"vicuna = \"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\"\n",
|
||||
"models = [llama_2, llama_2_7b, dolly_v2, vicuna]\n",
|
||||
"for model in models:\n",
|
||||
" response = completion(model=model, messages=messages)\n",
|
||||
" print(f\"Response from {model} \\n]\\n\")\n",
|
||||
" print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "zlTVLB-7PTV_",
|
||||
"outputId": "5182275b-3108-46fa-a2cf-745fac4ad110"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hi\n",
|
||||
" there!\n",
|
||||
" The\n",
|
||||
" current\n",
|
||||
" forecast\n",
|
||||
" for\n",
|
||||
" today's\n",
|
||||
" high\n",
|
||||
" temperature\n",
|
||||
" ranges\n",
|
||||
" from\n",
|
||||
" 75\n",
|
||||
" degrees\n",
|
||||
" Fahrenheit\n",
|
||||
" all\n",
|
||||
" day\n",
|
||||
" to\n",
|
||||
" 83\n",
|
||||
" degrees\n",
|
||||
" Fahrenheit\n",
|
||||
" with\n",
|
||||
" possible\n",
|
||||
" isolated\n",
|
||||
" thunderstorms\n",
|
||||
" during\n",
|
||||
" the\n",
|
||||
" afternoon\n",
|
||||
" hours,\n",
|
||||
" mainly\n",
|
||||
" at\n",
|
||||
" sunset\n",
|
||||
" through\n",
|
||||
" early\n",
|
||||
" evening. The\n",
|
||||
" Pacific\n",
|
||||
" Ocean\n",
|
||||
" has\n",
|
||||
" a\n",
|
||||
" low\n",
|
||||
" pressure\n",
|
||||
" of\n",
|
||||
" 926\n",
|
||||
" mb\n",
|
||||
" and\n",
|
||||
" mostly\n",
|
||||
" cloud\n",
|
||||
" cover\n",
|
||||
" in\n",
|
||||
" this\n",
|
||||
" region\n",
|
||||
" on\n",
|
||||
" sunny\n",
|
||||
" days\n",
|
||||
" due\n",
|
||||
" to\n",
|
||||
" warming\n",
|
||||
" temperatures\n",
|
||||
" above\n",
|
||||
" average\n",
|
||||
" along\n",
|
||||
" most\n",
|
||||
" coastal\n",
|
||||
" areas\n",
|
||||
" and\n",
|
||||
" ocean\n",
|
||||
" breezes.<|USER|>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# @title Stream Responses from Replicate - Outputs in the same format used by chatGPT streaming\n",
|
||||
"response = completion(model=llama_2, messages=messages, stream=True)\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk['choices'][0]['delta'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "t7WMRuL-8NrO"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,226 +0,0 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# [STREAMING] OpenAI, Anthropic, Replicate, Cohere using liteLLM\n",
|
||||
"In this tutorial:\n",
|
||||
"Note: All inputs/outputs are in the format used by `gpt-3.5-turbo`\n",
|
||||
"\n",
|
||||
"- Call all models in the same input format [**with streaming**]:\n",
|
||||
"\n",
|
||||
" `completion(model, messages, stream=True)`\n",
|
||||
"- All streaming generators are accessed at `chunk['choices'][0]['delta']`\n",
|
||||
"\n",
|
||||
"The following Models are covered in this tutorial\n",
|
||||
"- [GPT-3.5-Turbo](https://platform.openai.com/docs/models/gpt-3-5)\n",
|
||||
"- [Claude-2](https://www.anthropic.com/index/claude-2)\n",
|
||||
"- [StableLM Tuned Alpha 7B](https://replicate.com/stability-ai/stablelm-tuned-alpha-7b)\n",
|
||||
"- [A16z infra-LLAMA-2 7B Chat](https://replicate.com/a16z-infra/llama-2-7b-chat)\n",
|
||||
"- [Vicuna 13B](https://replicate.com/replicate/vicuna-13b)\n",
|
||||
"- [Cohere - Command Nightly]()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "YV6L5fNv7Kep"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "TO-EdF84O9QT"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install liteLLM\n",
|
||||
"!pip install litellm==0.1.369"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Imports & Set ENV variables\n",
|
||||
"Get your API Keys\n",
|
||||
"\n",
|
||||
"https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"https://replicate.com/account/api-tokens\n",
|
||||
"\n",
|
||||
"https://console.anthropic.com/account/keys\n",
|
||||
"\n",
|
||||
"https://dashboard.cohere.ai/api-keys\n"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "mpHTbTqQ8fey"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = '' # @param\n",
|
||||
"os.environ['REPLICATE_API_TOKEN'] = '' # @param\n",
|
||||
"os.environ['ANTHROPIC_API_KEY'] = '' # @param\n",
|
||||
"os.environ['COHERE_API_KEY'] = '' # @param"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "kDbgfcU8O-dW"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Set Messages"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "1KmkOdzLSOmJ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
|
||||
"messages = [{ \"content\": user_message,\"role\": \"user\"}]"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "xIEeOhVH-oh6"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Calling Models using liteLLM Streaming -\n",
|
||||
"\n",
|
||||
"## `completion(model, messages, stream)`"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "9SOCVRC1L-G3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# replicate models #######\n",
|
||||
"stability_ai = \"stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\"\n",
|
||||
"llama_2_7b = \"a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\"\n",
|
||||
"vicuna = \"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\"\n",
|
||||
"\n",
|
||||
"models = [\"gpt-3.5-turbo\", \"claude-2\", stability_ai, llama_2_7b, vicuna, \"command-nightly\"] # command-nightly is Cohere\n",
|
||||
"for model in models:\n",
|
||||
" replicate = (model == stability_ai or model==llama_2_7b or model==vicuna) # let liteLLM know if a model is replicate, using this optional param, `replicate=True`\n",
|
||||
" response = completion(model=model, messages=messages, stream=True, replicate=replicate)\n",
|
||||
" print(f\"####################\\n\\nResponse from {model}\")\n",
|
||||
" for i, chunk in enumerate(response):\n",
|
||||
" if i < 5: # NOTE: LIMITING CHUNKS FOR THIS DEMO\n",
|
||||
" print((chunk['choices'][0]['delta']))\n"
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "XJ4nh4SnRzHP",
|
||||
"outputId": "26b9fe10-b499-4a97-d60d-a8cb8f8030b8"
|
||||
},
|
||||
"execution_count": 13,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"####################\n",
|
||||
"\n",
|
||||
"Response from gpt-3.5-turbo\n",
|
||||
"{\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"\"\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"content\": \"I\"\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"content\": \"'m\"\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"content\": \" sorry\"\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"content\": \",\"\n",
|
||||
"}\n",
|
||||
"####################\n",
|
||||
"\n",
|
||||
"Response from claude-2\n",
|
||||
"{'role': 'assistant', 'content': ' Unfortunately'}\n",
|
||||
"{'role': 'assistant', 'content': ' I'}\n",
|
||||
"{'role': 'assistant', 'content': ' don'}\n",
|
||||
"{'role': 'assistant', 'content': \"'t\"}\n",
|
||||
"{'role': 'assistant', 'content': ' have'}\n",
|
||||
"####################\n",
|
||||
"\n",
|
||||
"Response from stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\n",
|
||||
"{'role': 'assistant', 'content': \"I'm\"}\n",
|
||||
"{'role': 'assistant', 'content': ' sorry,'}\n",
|
||||
"{'role': 'assistant', 'content': ' I'}\n",
|
||||
"{'role': 'assistant', 'content': ' cannot'}\n",
|
||||
"{'role': 'assistant', 'content': ' answer'}\n",
|
||||
"####################\n",
|
||||
"\n",
|
||||
"Response from a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\n",
|
||||
"{'role': 'assistant', 'content': ''}\n",
|
||||
"{'role': 'assistant', 'content': ' Hello'}\n",
|
||||
"{'role': 'assistant', 'content': '!'}\n",
|
||||
"{'role': 'assistant', 'content': ' I'}\n",
|
||||
"{'role': 'assistant', 'content': \"'\"}\n",
|
||||
"####################\n",
|
||||
"\n",
|
||||
"Response from replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\n",
|
||||
"{'role': 'assistant', 'content': 'Comment:'}\n",
|
||||
"{'role': 'assistant', 'content': 'Hi! '}\n",
|
||||
"{'role': 'assistant', 'content': 'How '}\n",
|
||||
"{'role': 'assistant', 'content': 'are '}\n",
|
||||
"{'role': 'assistant', 'content': 'you '}\n",
|
||||
"####################\n",
|
||||
"\n",
|
||||
"Response from command-nightly\n",
|
||||
"{'role': 'assistant', 'content': ' Hello'}\n",
|
||||
"{'role': 'assistant', 'content': '!'}\n",
|
||||
"{'role': 'assistant', 'content': ' '}\n",
|
||||
"{'role': 'assistant', 'content': ' I'}\n",
|
||||
"{'role': 'assistant', 'content': \"'m\"}\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"id": "t7WMRuL-8NrO"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,199 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using Google Palm (VertexAI) with liteLLM \n",
|
||||
"### chat-bison, chat-bison@001, text-bison, text-bison@001"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install litellm==0.1.388"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set VertexAI Configs\n",
|
||||
"Vertex AI requires the following:\n",
|
||||
"* `vertex_project` - Your Project ID\n",
|
||||
"* `vertex_location` - Your Vertex AI region\n",
|
||||
"Both can be found on: https://console.cloud.google.com/\n",
|
||||
"\n",
|
||||
"VertexAI uses Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information on setting this up\n",
|
||||
"\n",
|
||||
"NOTE: VertexAI requires you to set `application_default_credentials.json`, this can be set by running `gcloud auth application-default login` in your terminal\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set you Vertex AI configs\n",
|
||||
"import litellm\n",
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"litellm.vertex_project = \"hardy-device-386718\"\n",
|
||||
"litellm.vertex_location = \"us-central1\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Call VertexAI - chat-bison using liteLLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': LiteLLM LiteLLM is a large language model from Google AI that is designed to be lightweight and efficient. It is based on the Transformer architecture and has been trained on a massive dataset of text. LiteLLM is available as a pre-trained model that can be used for a variety of natural language processing tasks, such as text classification, question answering, and summarization.}}], 'created': 1692036777.831989, 'model': 'chat-bison'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"user_message = \"what is liteLLM \"\n",
|
||||
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# chat-bison or chat-bison@001 supported by Vertex AI (As of Aug 2023)\n",
|
||||
"response = completion(model=\"chat-bison\", messages=messages)\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Call VertexAI - text-bison using liteLLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['text-bison', 'text-bison@001']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(litellm.vertex_text_models)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': liteLLM is a low-precision variant of the large language model LLM 5. For a given text prompt, liteLLM can continue the text in a way that is both coherent and informative.}}], 'created': 1692036813.052487, 'model': 'text-bison@001'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"user_message = \"what is liteLLM \"\n",
|
||||
"messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
|
||||
"\n",
|
||||
"# text-bison or text-bison@001 supported by Vertex AI (As of Aug 2023)\n",
|
||||
"response = completion(model=\"text-bison@001\", messages=messages)\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': liteLLM was originally developed by Google engineers as a lite version of LLM, which stands for large language model. It is a deep learning language model that is designed to be more efficient than traditional LLMs while still achieving comparable performance. liteLLM is built on Tensor2Tensor, a framework for building and training large neural networks. It is able to learn from massive amounts of text data and generate text that is both coherent and informative. liteLLM has been shown to be effective for a variety of tasks, including machine translation, text summarization, and question answering.}}], 'created': 1692036821.60951, 'model': 'text-bison'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(model=\"text-bison\", messages=messages)\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"liteLLM is a lightweight language model that is designed to be fast and efficient. It is based on the Transformer architecture, but it has been modified to reduce the number of parameters and the amount of computation required. This makes it suitable for use on devices with limited resources, such as mobile phones and embedded systems.\n",
|
||||
"\n",
|
||||
"liteLLM is still under development, but it has already been shown to be effective on a variety of tasks, including text classification, natural language inference, and machine translation. It is also being used to develop new applications, such as chatbots and language assistants.\n",
|
||||
"\n",
|
||||
"If you are interested in learning more about lite\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(model=\"text-bison@001\", messages=messages, temperature=0.4, top_k=10, top_p=0.2)\n",
|
||||
"print(response['choices'][0]['message']['content'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -1,187 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LiteLLM Clarifai \n",
|
||||
"This notebook walks you through on how to use liteLLM integration of Clarifai and call LLM model from clarifai with response in openAI output format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pre-Requisites"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#install necessary packages\n",
|
||||
"!pip install litellm\n",
|
||||
"!pip install clarifai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To obtain Clarifai Personal Access Token follow the steps mentioned in the [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Set Clarifai Credentials\n",
|
||||
"import os\n",
|
||||
"os.environ[\"CLARIFAI_API_KEY\"]= \"YOUR_CLARIFAI_PAT\" # Clarifai PAT"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Mistral-large"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import litellm\n",
|
||||
"\n",
|
||||
"litellm.set_verbose=False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Mistral large response : ModelResponse(id='chatcmpl-6eed494d-7ae2-4870-b9c2-6a64d50a6151', choices=[Choices(finish_reason='stop', index=1, message=Message(content=\"In the grand tapestry of time, where tales unfold,\\nLies the chronicle of ages, a sight to behold.\\nA tale of empires rising, and kings of old,\\nOf civilizations lost, and stories untold.\\n\\nOnce upon a yesterday, in a time so vast,\\nHumans took their first steps, casting shadows in the past.\\nFrom the cradle of mankind, a journey they embarked,\\nThrough stone and bronze and iron, their skills they sharpened and marked.\\n\\nEgyptians built pyramids, reaching for the skies,\\nWhile Greeks sought wisdom, truth, in philosophies that lie.\\nRoman legions marched, their empire to expand,\\nAnd in the East, the Silk Road joined the world, hand in hand.\\n\\nThe Middle Ages came, with knights in shining armor,\\nFeudal lords and serfs, a time of both clamor and calm order.\\nThen Renaissance bloomed, like a flower in the sun,\\nA rebirth of art and science, a new age had begun.\\n\\nAcross the vast oceans, explorers sailed with courage bold,\\nDiscovering new lands, stories of adventure, untold.\\nIndustrial Revolution churned, progress in its wake,\\nMachines and factories, a whole new world to make.\\n\\nTwo World Wars raged, a testament to man's strife,\\nYet from the ashes rose hope, a renewed will for life.\\nInto the modern era, technology took flight,\\nConnecting every corner, bathed in digital light.\\n\\nHistory, a symphony, a melody of time,\\nA testament to human will, resilience so sublime.\\nIn every page, a lesson, in every tale, a guide,\\nFor understanding our past, shapes our future's tide.\", role='assistant'))], created=1713896412, model='https://api.clarifai.com/v2/users/mistralai/apps/completion/models/mistral-large/outputs', object='chat.completion', system_fingerprint=None, usage=Usage(prompt_tokens=13, completion_tokens=338, total_tokens=351))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
|
||||
"response=completion(\n",
|
||||
" model=\"clarifai/mistralai.completion.mistral-large\",\n",
|
||||
" messages=messages,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print(f\"Mistral large response : {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Claude-2.1 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Claude-2.1 response : ModelResponse(id='chatcmpl-d126c919-4db4-4aa3-ac8f-7edea41e0b93', choices=[Choices(finish_reason='stop', index=1, message=Message(content=\" Here's a poem I wrote about history:\\n\\nThe Tides of Time\\n\\nThe tides of time ebb and flow,\\nCarrying stories of long ago.\\nFigures and events come into light,\\nShaping the future with all their might.\\n\\nKingdoms rise, empires fall, \\nLeaving traces that echo down every hall.\\nRevolutions bring change with a fiery glow,\\nToppling structures from long ago.\\n\\nExplorers traverse each ocean and land,\\nSeeking treasures they don't understand.\\nWhile artists and writers try to make their mark,\\nHoping their works shine bright in the dark.\\n\\nThe cycle repeats again and again,\\nAs humanity struggles to learn from its pain.\\nThough the players may change on history's stage,\\nThe themes stay the same from age to age.\\n\\nWar and peace, life and death,\\nLove and strife with every breath.\\nThe tides of time continue their dance,\\nAs we join in, by luck or by chance.\\n\\nSo we study the past to light the way forward, \\nHeeding warnings from stories told and heard.\\nThe future unfolds from this unending flow -\\nWhere the tides of time ultimately go.\", role='assistant'))], created=1713896579, model='https://api.clarifai.com/v2/users/anthropic/apps/completion/models/claude-2_1/outputs', object='chat.completion', system_fingerprint=None, usage=Usage(prompt_tokens=12, completion_tokens=232, total_tokens=244))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
|
||||
"response=completion(\n",
|
||||
" model=\"clarifai/anthropic.completion.claude-2_1\",\n",
|
||||
" messages=messages,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print(f\"Claude-2.1 response : {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### OpenAI GPT-4 (Streaming)\n",
|
||||
"Though clarifai doesn't support streaming, still you can call stream and get the response in standard StreamResponse format of liteLLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ModelResponse(id='chatcmpl-40ae19af-3bf0-4eb4-99f2-33aec3ba84af', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"In the quiet corners of time's grand hall,\\nLies the tale of rise and fall.\\nFrom ancient ruins to modern sprawl,\\nHistory, the greatest story of them all.\\n\\nEmpires have risen, empires have decayed,\\nThrough the eons, memories have stayed.\\nIn the book of time, history is laid,\\nA tapestry of events, meticulously displayed.\\n\\nThe pyramids of Egypt, standing tall,\\nThe Roman Empire's mighty sprawl.\\nFrom Alexander's conquest, to the Berlin Wall,\\nHistory, a silent witness to it all.\\n\\nIn the shadow of the past we tread,\\nWhere once kings and prophets led.\\nTheir stories in our hearts are spread,\\nEchoes of their words, in our minds are read.\\n\\nBattles fought and victories won,\\nActs of courage under the sun.\\nTales of love, of deeds done,\\nIn history's grand book, they all run.\\n\\nHeroes born, legends made,\\nIn the annals of time, they'll never fade.\\nTheir triumphs and failures all displayed,\\nIn the eternal march of history's parade.\\n\\nThe ink of the past is forever dry,\\nBut its lessons, we cannot deny.\\nIn its stories, truths lie,\\nIn its wisdom, we rely.\\n\\nHistory, a mirror to our past,\\nA guide for the future vast.\\nThrough its lens, we're ever cast,\\nIn the drama of life, forever vast.\", role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1714744515, model='https://api.clarifai.com/v2/users/openai/apps/chat-completion/models/GPT-4/outputs', object='chat.completion.chunk', system_fingerprint=None)\n",
|
||||
"ModelResponse(id='chatcmpl-40ae19af-3bf0-4eb4-99f2-33aec3ba84af', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1714744515, model='https://api.clarifai.com/v2/users/openai/apps/chat-completion/models/GPT-4/outputs', object='chat.completion.chunk', system_fingerprint=None)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from litellm import completion\n",
|
||||
"\n",
|
||||
"messages = [{\"role\": \"user\",\"content\": \"\"\"Write a poem about history?\"\"\"}]\n",
|
||||
"response = completion(\n",
|
||||
" model=\"clarifai/openai.chat-completion.GPT-4\",\n",
|
||||
" messages=messages,\n",
|
||||
" stream=True,\n",
|
||||
" api_key = \"c75cc032415e45368be331fdd2c06db0\")\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -1,331 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vnvlwUDZK7VA"
|
||||
},
|
||||
"source": [
|
||||
"## Demo Notebook of Function Calling with liteLLM\n",
|
||||
"- Supported Providers for Function Calling\n",
|
||||
" - OpenAI - `gpt-4-0613` and `gpt-3.5-turbo-0613`\n",
|
||||
"- In this notebook we use function calling with `litellm.completion()`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "KrINCwRfLgZV"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Install liteLLM\n",
|
||||
"!pip install litellm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "nK7zR5OgLlh2"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from litellm import completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"id": "dCQlyBxKLqbA"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\" #@param"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gfdGv-FMRCdX"
|
||||
},
|
||||
"source": [
|
||||
"## Define Messages, Functions\n",
|
||||
"We create a get_current_weather() function and pass that to GPT 3.5\n",
|
||||
"\n",
|
||||
"See OpenAI docs for this: https://openai.com/blog/function-calling-and-other-api-updates"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"id": "ERzsP1sfM19C"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"}\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"def get_current_weather(location):\n",
|
||||
" if location == \"Boston, MA\":\n",
|
||||
" return \"The weather is 12F\"\n",
|
||||
"\n",
|
||||
"functions = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\"\n",
|
||||
" },\n",
|
||||
" \"unit\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"enum\": [\"celsius\", \"fahrenheit\"]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "NX6by2VuRPnp"
|
||||
},
|
||||
"source": [
|
||||
"## Call gpt-3.5-turbo-0613 to Decide what Function to call"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "QVoJ5PtxMlVx",
|
||||
"outputId": "efe7a81f-e04a-4afc-aa60-a2b2648f5fb9"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7mX4RiqdoislVEqfmfVjFSKp3hyIy\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1691801223,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": null,\n",
|
||||
" \"function_call\": {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"function_call\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 82,\n",
|
||||
" \"completion_tokens\": 18,\n",
|
||||
" \"total_tokens\": 100\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Yu0o2saDNLx8"
|
||||
},
|
||||
"source": [
|
||||
"## Parse GPT 3.5 Response\n",
|
||||
"Read Information about what Function to Call"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "u1DzXLJsNOR5",
|
||||
"outputId": "177e9501-0ce2-4619-9067-3047f18f6c79"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<OpenAIObject at 0x7922c70ce930> JSON: {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"arguments\": \"{\\n \\\"location\\\": \\\"Boston, MA\\\"\\n}\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"function_call_data = response[\"choices\"][0][\"message\"][\"function_call\"]\n",
|
||||
"function_call_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "tYb96Mh0NhH9",
|
||||
"outputId": "13c4bb89-6f29-4b3b-afa7-302dcf2cdd5f"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"get_current_weather {'location': 'Boston, MA'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"function_name = function_call_data['name']\n",
|
||||
"function_args = function_call_data['arguments']\n",
|
||||
"function_args = json.loads(function_args)\n",
|
||||
"print(function_name, function_args)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "z3tstH_yN3fX"
|
||||
},
|
||||
"source": [
|
||||
"## Call the get_current_weather() function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "TSb8JHhgN5Zc",
|
||||
"outputId": "ef140572-4020-4daf-ac8c-d5161be9aa5c"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"12F\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"if function_name == \"get_current_weather\":\n",
|
||||
" result = get_current_weather(**function_args)\n",
|
||||
" print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "k4HGJE3NRmMI"
|
||||
},
|
||||
"source": [
|
||||
"## Send the response from get_current_weather back to the model to summarize"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "a23cmEwiPaw7",
|
||||
"outputId": "43259b86-0c4c-4fcb-eab7-6e1a788b2f21"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"id\": \"chatcmpl-7mXGN62u75WXp1Lgen4iSgNvA7hHT\",\n",
|
||||
" \"object\": \"chat.completion\",\n",
|
||||
" \"created\": 1691801963,\n",
|
||||
" \"model\": \"gpt-3.5-turbo-0613\",\n",
|
||||
" \"choices\": [\n",
|
||||
" {\n",
|
||||
" \"index\": 0,\n",
|
||||
" \"message\": {\n",
|
||||
" \"role\": \"assistant\",\n",
|
||||
" \"content\": \"The current weather in Boston is 12 degrees Fahrenheit.\"\n",
|
||||
" },\n",
|
||||
" \"finish_reason\": \"stop\"\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"usage\": {\n",
|
||||
" \"prompt_tokens\": 109,\n",
|
||||
" \"completion_tokens\": 12,\n",
|
||||
" \"total_tokens\": 121\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is the weather like in Boston?\"},\n",
|
||||
" {\"role\": \"assistant\", \"content\": None, \"function_call\": {\"name\": \"get_current_weather\", \"arguments\": \"{ \\\"location\\\": \\\"Boston, MA\\\"}\"}},\n",
|
||||
" {\"role\": \"function\", \"name\": \"get_current_weather\", \"content\": result}\n",
|
||||
"]\n",
|
||||
"response = completion(model=\"gpt-3.5-turbo-0613\", messages=messages, functions=functions)\n",
|
||||
"print(response)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@@ -1,25 +0,0 @@
|
||||
FROM ollama/ollama as ollama
|
||||
|
||||
RUN echo "auto installing llama2"
|
||||
|
||||
# auto install ollama/llama2
|
||||
RUN ollama serve & sleep 2 && ollama pull llama2
|
||||
|
||||
RUN echo "installing litellm"
|
||||
|
||||
RUN apt-get update
|
||||
|
||||
# Install Python
|
||||
RUN apt-get install -y python3 python3-pip
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the current directory contents into the container at /app
|
||||
COPY . /app
|
||||
|
||||
# Install any needed packages specified in requirements.txt
|
||||
|
||||
RUN python3 -m pip install litellm
|
||||
COPY start.sh /start.sh
|
||||
ENTRYPOINT [ "/bin/bash", "/start.sh" ]
|
@@ -1 +0,0 @@
|
||||
litellm==1.61.15
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user