Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/tests/local_testing/test_mem_leak.py
+++ b/Development/litellm/tests/local_testing/test_mem_leak.py
@@ -0,0 +1,243 @@
+# import io
+# import os
+# import sys
+
+# sys.path.insert(0, os.path.abspath("../.."))
+
+# import litellm
+# from memory_profiler import profile
+# from litellm.utils import (
+#     ModelResponseIterator,
+#     ModelResponseListIterator,
+#     CustomStreamWrapper,
+# )
+# from litellm.types.utils import ModelResponse, Choices, Message
+# import time
+# import pytest
+
+
+# # @app.post("/debug")
+# # async def debug(body: ExampleRequest) -> str:
+# #     return await main_logic(body.query)
+# def model_response_list_factory():
+#     chunks = [
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [
+#                 {
+#                     "delta": {"content": "", "role": "assistant"},
+#                     "finish_reason": None,
+#                     "index": 0,
+#                 }
+#             ],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [
+#                 {"delta": {"content": "This"}, "finish_reason": None, "index": 0}
+#             ],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [
+#                 {"delta": {"content": " is"}, "finish_reason": None, "index": 0}
+#             ],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [
+#                 {"delta": {"content": " a"}, "finish_reason": None, "index": 0}
+#             ],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [
+#                 {"delta": {"content": " dummy"}, "finish_reason": None, "index": 0}
+#             ],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [
+#                 {
+#                     "delta": {"content": " response"},
+#                     "finish_reason": None,
+#                     "index": 0,
+#                 }
+#             ],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "",
+#             "choices": [
+#                 {
+#                     "finish_reason": None,
+#                     "index": 0,
+#                     "content_filter_offsets": {
+#                         "check_offset": 35159,
+#                         "start_offset": 35159,
+#                         "end_offset": 36150,
+#                     },
+#                     "content_filter_results": {
+#                         "hate": {"filtered": False, "severity": "safe"},
+#                         "self_harm": {"filtered": False, "severity": "safe"},
+#                         "sexual": {"filtered": False, "severity": "safe"},
+#                         "violence": {"filtered": False, "severity": "safe"},
+#                     },
+#                 }
+#             ],
+#             "created": 0,
+#             "model": "",
+#             "object": "",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [{"delta": {"content": "."}, "finish_reason": None, "index": 0}],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+#             "choices": [{"delta": {}, "finish_reason": "stop", "index": 0}],
+#             "created": 1716563849,
+#             "model": "gpt-4o-2024-05-13",
+#             "object": "chat.completion.chunk",
+#             "system_fingerprint": "fp_5f4bad809a",
+#         },
+#         {
+#             "id": "",
+#             "choices": [
+#                 {
+#                     "finish_reason": None,
+#                     "index": 0,
+#                     "content_filter_offsets": {
+#                         "check_offset": 36150,
+#                         "start_offset": 36060,
+#                         "end_offset": 37029,
+#                     },
+#                     "content_filter_results": {
+#                         "hate": {"filtered": False, "severity": "safe"},
+#                         "self_harm": {"filtered": False, "severity": "safe"},
+#                         "sexual": {"filtered": False, "severity": "safe"},
+#                         "violence": {"filtered": False, "severity": "safe"},
+#                     },
+#                 }
+#             ],
+#             "created": 0,
+#             "model": "",
+#             "object": "",
+#         },
+#     ]
+
+#     chunk_list = []
+#     for chunk in chunks:
+#         new_chunk = litellm.ModelResponse(stream=True, id=chunk["id"])
+#         if "choices" in chunk and isinstance(chunk["choices"], list):
+#             new_choices = []
+#             for choice in chunk["choices"]:
+#                 if isinstance(choice, litellm.utils.StreamingChoices):
+#                     _new_choice = choice
+#                 elif isinstance(choice, dict):
+#                     _new_choice = litellm.utils.StreamingChoices(**choice)
+#                 new_choices.append(_new_choice)
+#             new_chunk.choices = new_choices
+#         chunk_list.append(new_chunk)
+
+#     return ModelResponseListIterator(model_responses=chunk_list)
+
+
+# async def mock_completion(*args, **kwargs):
+#     completion_stream = model_response_list_factory()
+#     return litellm.CustomStreamWrapper(
+#         completion_stream=completion_stream,
+#         model="gpt-4-0613",
+#         custom_llm_provider="cached_response",
+#         logging_obj=litellm.Logging(
+#             model="gpt-4-0613",
+#             messages=[{"role": "user", "content": "Hey"}],
+#             stream=True,
+#             call_type="completion",
+#             start_time=time.time(),
+#             litellm_call_id="12345",
+#             function_id="1245",
+#         ),
+#     )
+
+
+# @profile
+# async def main_logic() -> str:
+#     stream = await mock_completion()
+#     result = ""
+#     async for chunk in stream:
+#         result += chunk.choices[0].delta.content or ""
+#     return result
+
+
+# import asyncio
+
+# for _ in range(100):
+#     asyncio.run(main_logic())
+
+
+# # @pytest.mark.asyncio
+# # def test_memory_profile(capsys):
+# #     # Run the async function
+# #     result = asyncio.run(main_logic())
+
+# #     # Verify the result
+# #     assert result == "This is a dummy response."
+
+# #     # Capture the output
+# #     captured = capsys.readouterr()
+
+# #     # Print memory output for debugging
+# #     print("Memory Profiler Output:")
+# #     print(f"captured out: {captured.out}")
+
+# #     # Basic memory leak checks
+# #     for idx, line in enumerate(captured.out.split("\n")):
+# #         if idx % 2 == 0 and "MiB" in line:
+# #             print(f"line: {line}")
+
+# #     # mem_lines = [line for line in captured.out.split("\n") if "MiB" in line]
+
+# #     print(mem_lines)
+
+# #     # Ensure we have some memory lines
+# #     assert len(mem_lines) > 0, "No memory profiler output found"
+
+# #     # Optional: Add more specific memory leak detection
+# #     for line in mem_lines:
+# #         # Extract memory increment
+# #         parts = line.split()
+# #         if len(parts) >= 3:
+# #             try:
+# #                 mem_increment = float(parts[2].replace("MiB", ""))
+# #                 # Assert that memory increment is below a reasonable threshold
+# #                 assert mem_increment < 1.0, f"Potential memory leak detected: {line}"
+# #             except (ValueError, IndexError):
+# #                 pass  # Skip lines that don't match expected format