Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/cookbook/LiteLLM_HuggingFace.ipynb
+++ b/Development/litellm/cookbook/LiteLLM_HuggingFace.ipynb
@@ -0,0 +1,252 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9dKM5k8qsMIj"
+      },
+      "source": [
+        "## LiteLLM Hugging Face\n",
+        "\n",
+        "Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BVDdmCp-o97j"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install litellm"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yp5UXRqtpu9f"
+      },
+      "source": [
+        "## Serverless Inference Providers\n",
+        "\n",
+        "Read more about Inference Providers here: https://huggingface.co/blog/inference-providers.\n",
+        "\n",
+        "In order to use litellm with Hugging Face Inference Providers, you need to set `model=huggingface/<provider>/<model-id>`.\n",
+        "\n",
+        "Example: `huggingface/together/deepseek-ai/DeepSeek-R1` to run DeepSeek-R1 (https://huggingface.co/deepseek-ai/DeepSeek-R1) through Together AI.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Pi5Oww8gpCUm",
+        "outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from litellm import completion\n",
+        "\n",
+        "# You can create a HF token here: https://huggingface.co/settings/tokens\n",
+        "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
+        "\n",
+        "# Call DeepSeek-R1 model through Together AI\n",
+        "response = completion(\n",
+        "    model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n",
+        "    messages=[{\"content\": \"How many r's are in the word `strawberry`?\", \"role\": \"user\"}],\n",
+        ")\n",
+        "print(response)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EU0UubrKzTFe"
+      },
+      "source": [
+        "## Streaming\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "y-QfIvA-uJKX",
+        "outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from litellm import completion\n",
+        "\n",
+        "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
+        "\n",
+        "response = completion(\n",
+        "    model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n",
+        "    messages=[\n",
+        "        {\n",
+        "            \"role\": \"user\",\n",
+        "            \"content\": \"How many r's are in the word `strawberry`?\",\n",
+        "            \n",
+        "        }\n",
+        "    ],\n",
+        "    stream=True,\n",
+        ")\n",
+        "\n",
+        "for chunk in response:\n",
+        "    print(chunk)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## With images as input\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from litellm import completion\n",
+        "\n",
+        "# Set your Hugging Face Token\n",
+        "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
+        "\n",
+        "messages = [\n",
+        "    {\n",
+        "        \"role\": \"user\",\n",
+        "        \"content\": [\n",
+        "            {\"type\": \"text\", \"text\": \"What's in this image?\"},\n",
+        "            {\n",
+        "                \"type\": \"image_url\",\n",
+        "                \"image_url\": {\n",
+        "                    \"url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\",\n",
+        "                },\n",
+        "            },\n",
+        "        ],\n",
+        "    }\n",
+        "]\n",
+        "\n",
+        "response = completion(\n",
+        "    model=\"huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct\",\n",
+        "    messages=messages,\n",
+        ")\n",
+        "print(response.choices[0])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Tools - Function Calling\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from litellm import completion\n",
+        "\n",
+        "\n",
+        "# Set your Hugging Face Token\n",
+        "os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
+        "\n",
+        "tools = [\n",
+        "    {\n",
+        "        \"type\": \"function\",\n",
+        "        \"function\": {\n",
+        "            \"name\": \"get_current_weather\",\n",
+        "            \"description\": \"Get the current weather in a given location\",\n",
+        "            \"parameters\": {\n",
+        "                \"type\": \"object\",\n",
+        "                \"properties\": {\n",
+        "                    \"location\": {\n",
+        "                        \"type\": \"string\",\n",
+        "                        \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
+        "                    },\n",
+        "                    \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
+        "                },\n",
+        "                \"required\": [\"location\"],\n",
+        "            },\n",
+        "        },\n",
+        "    }\n",
+        "]\n",
+        "messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n",
+        "\n",
+        "response = completion(\n",
+        "    model=\"huggingface/sambanova/meta-llama/Llama-3.1-8B-Instruct\", messages=messages, tools=tools, tool_choice=\"auto\"\n",
+        ")\n",
+        "print(response)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Hugging Face Dedicated Inference Endpoints\n",
+        "\n",
+        "Steps to use\n",
+        "\n",
+        "- Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n",
+        "- Set `api_base` to your deployed api base\n",
+        "- set the model to `huggingface/tgi` so that litellm knows it's a huggingface Deployed Inference Endpoint.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import litellm\n",
+        "\n",
+        "\n",
+        "response = litellm.completion(\n",
+        "    model=\"huggingface/tgi\",\n",
+        "    messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}],\n",
+        "    api_base=\"https://my-endpoint.endpoints.huggingface.cloud/v1/\",\n",
+        ")\n",
+        "print(response)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": ".venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.0"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}