Added LiteLLM to the stack

2025-08-18 09:40:50 +00:00
parent 0648c1968c
commit d220b04e32
2682 changed files with 533609 additions and 1 deletions
--- a/Development/litellm/tests/test_litellm/integrations/SlackAlerting/test_hanging_request_check.py
+++ b/Development/litellm/tests/test_litellm/integrations/SlackAlerting/test_hanging_request_check.py
@@ -0,0 +1,262 @@
+import json
+import os
+import sys
+from typing import Optional
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# Adds the grandparent directory to sys.path to allow importing project modules
+sys.path.insert(0, os.path.abspath("../.."))
+
+from litellm.integrations.SlackAlerting.hanging_request_check import (
+    AlertingHangingRequestCheck,
+)
+from litellm.types.integrations.slack_alerting import HangingRequestData
+
+
+class TestAlertingHangingRequestCheck:
+    """Test suite for AlertingHangingRequestCheck class"""
+
+    @pytest.fixture
+    def mock_slack_alerting(self):
+        """Create a mock SlackAlerting object for testing"""
+        mock_slack = MagicMock()
+        mock_slack.alerting_threshold = 300  # 5 minutes
+        mock_slack.send_alert = AsyncMock()
+        return mock_slack
+
+    @pytest.fixture
+    def hanging_request_checker(self, mock_slack_alerting):
+        """Create an AlertingHangingRequestCheck instance for testing"""
+        return AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)
+
+    @pytest.mark.asyncio
+    async def test_init_creates_cache_with_correct_ttl(self, mock_slack_alerting):
+        """
+        Test that initialization creates a hanging request cache with correct TTL.
+        The TTL should be alerting_threshold + buffer time.
+        """
+        checker = AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)
+
+        # The cache should be created with TTL = alerting_threshold + buffer time
+        expected_ttl = (
+            mock_slack_alerting.alerting_threshold + 60
+        )  # HANGING_ALERT_BUFFER_TIME_SECONDS
+        assert checker.hanging_request_cache.default_ttl == expected_ttl
+
+    @pytest.mark.asyncio
+    async def test_add_request_to_hanging_request_check_success(
+        self, hanging_request_checker
+    ):
+        """
+        Test successfully adding a request to the hanging request cache.
+        Should extract metadata and store HangingRequestData in cache.
+        """
+        request_data = {
+            "litellm_call_id": "test_request_123",
+            "model": "gpt-4",
+            "deployment": {"litellm_params": {"api_base": "https://api.openai.com/v1"}},
+            "metadata": {
+                "user_api_key_alias": "test_key",
+                "user_api_key_team_alias": "test_team",
+            },
+        }
+
+        with patch("litellm.get_api_base", return_value="https://api.openai.com/v1"):
+            await hanging_request_checker.add_request_to_hanging_request_check(
+                request_data
+            )
+
+        # Verify the request was added to cache
+        cached_data = (
+            await hanging_request_checker.hanging_request_cache.async_get_cache(
+                key="test_request_123"
+            )
+        )
+
+        assert cached_data is not None
+        assert isinstance(cached_data, HangingRequestData)
+        assert cached_data.request_id == "test_request_123"
+        assert cached_data.model == "gpt-4"
+        assert cached_data.api_base == "https://api.openai.com/v1"
+
+    @pytest.mark.asyncio
+    async def test_add_request_to_hanging_request_check_none_request_data(
+        self, hanging_request_checker
+    ):
+        """
+        Test that passing None request_data returns early without error.
+        Should handle gracefully when no request data is provided.
+        """
+        result = await hanging_request_checker.add_request_to_hanging_request_check(
+            None
+        )
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_add_request_to_hanging_request_check_minimal_data(
+        self, hanging_request_checker
+    ):
+        """
+        Test adding request with minimal required data.
+        Should handle cases where optional fields are missing.
+        """
+        request_data = {
+            "litellm_call_id": "minimal_request_456",
+            "model": "gpt-3.5-turbo",
+        }
+
+        await hanging_request_checker.add_request_to_hanging_request_check(request_data)
+
+        cached_data = (
+            await hanging_request_checker.hanging_request_cache.async_get_cache(
+                key="minimal_request_456"
+            )
+        )
+
+        assert cached_data is not None
+        assert cached_data.request_id == "minimal_request_456"
+        assert cached_data.model == "gpt-3.5-turbo"
+        assert cached_data.api_base is None
+        assert cached_data.key_alias == ""
+        assert cached_data.team_alias == ""
+
+    @pytest.mark.asyncio
+    async def test_send_hanging_request_alert(self, hanging_request_checker):
+        """
+        Test sending a hanging request alert.
+        Should format the alert message correctly and call slack alerting.
+        """
+        hanging_request_data = HangingRequestData(
+            request_id="test_hanging_request",
+            model="gpt-4",
+            api_base="https://api.openai.com/v1",
+            key_alias="test_key",
+            team_alias="test_team",
+        )
+
+        await hanging_request_checker.send_hanging_request_alert(hanging_request_data)
+
+        # Verify slack alert was called
+        hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()
+
+        # Check the alert message format
+        call_args = hanging_request_checker.slack_alerting_object.send_alert.call_args
+        message = call_args[1]["message"]
+
+        assert "Requests are hanging - 300s+ request time" in message
+        assert "Request Model: `gpt-4`" in message
+        assert "API Base: `https://api.openai.com/v1`" in message
+        assert "Key Alias: `test_key`" in message
+        assert "Team Alias: `test_team`" in message
+        assert call_args[1]["level"] == "Medium"
+
+    @pytest.mark.asyncio
+    async def test_send_alerts_for_hanging_requests_no_proxy_logging(
+        self, hanging_request_checker
+    ):
+        """
+        Test send_alerts_for_hanging_requests when proxy_logging_obj.internal_usage_cache is None.
+        Should return early without processing when internal usage cache is unavailable.
+        """
+        with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
+            mock_proxy.internal_usage_cache = None
+
+            result = await hanging_request_checker.send_alerts_for_hanging_requests()
+            assert result is None
+
+    @pytest.mark.asyncio
+    async def test_send_alerts_for_hanging_requests_with_completed_request(
+        self, hanging_request_checker
+    ):
+        """
+        Test send_alerts_for_hanging_requests when request has completed (not hanging).
+        Should remove completed requests from cache and not send alerts.
+        """
+        # Add a request to the hanging cache
+        hanging_data = HangingRequestData(
+            request_id="completed_request_789",
+            model="gpt-4",
+            api_base="https://api.openai.com/v1",
+        )
+        await hanging_request_checker.hanging_request_cache.async_set_cache(
+            key="completed_request_789", value=hanging_data, ttl=300
+        )
+
+        with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
+            # Mock internal usage cache to return a request status (meaning request completed)
+            mock_internal_cache = AsyncMock()
+            mock_internal_cache.async_get_cache.return_value = {"status": "success"}
+            mock_proxy.internal_usage_cache = mock_internal_cache
+
+            # Mock the cache method to return our test request
+            hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
+                AsyncMock(return_value=["completed_request_789"])
+            )
+
+            await hanging_request_checker.send_alerts_for_hanging_requests()
+
+        # Verify no alert was sent since request completed
+        hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_send_alerts_for_hanging_requests_with_actual_hanging_request(
+        self, hanging_request_checker
+    ):
+        """
+        Test send_alerts_for_hanging_requests when request is actually hanging.
+        Should send alert for requests that haven't completed within threshold.
+        """
+        # Add a hanging request to the cache
+        hanging_data = HangingRequestData(
+            request_id="hanging_request_999",
+            model="gpt-4",
+            api_base="https://api.openai.com/v1",
+            key_alias="test_key",
+            team_alias="test_team",
+        )
+        await hanging_request_checker.hanging_request_cache.async_set_cache(
+            key="hanging_request_999", value=hanging_data, ttl=300
+        )
+
+        with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
+            # Mock internal usage cache to return None (meaning request is still hanging)
+            mock_internal_cache = AsyncMock()
+            mock_internal_cache.async_get_cache.return_value = None
+            mock_proxy.internal_usage_cache = mock_internal_cache
+
+            # Mock the cache method to return our test request
+            hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
+                AsyncMock(return_value=["hanging_request_999"])
+            )
+
+            await hanging_request_checker.send_alerts_for_hanging_requests()
+
+        # Verify alert was sent for hanging request
+        hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_send_alerts_for_hanging_requests_with_missing_hanging_data(
+        self, hanging_request_checker
+    ):
+        """
+        Test send_alerts_for_hanging_requests when hanging request data is missing from cache.
+        Should continue processing other requests when individual request data is missing.
+        """
+        with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
+            mock_internal_cache = AsyncMock()
+            mock_proxy.internal_usage_cache = mock_internal_cache
+
+            # Mock cache to return request ID but no data (simulating expired or missing data)
+            hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
+                AsyncMock(return_value=["missing_request_111"])
+            )
+            hanging_request_checker.hanging_request_cache.async_get_cache = AsyncMock(
+                return_value=None
+            )
+
+            await hanging_request_checker.send_alerts_for_hanging_requests()
+
+        # Should not crash and should not send any alerts
+        hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()
--- a/Development/litellm/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting.py
+++ b/Development/litellm/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting.py
@@ -0,0 +1,174 @@
+import datetime
+import json
+import os
+import sys
+import unittest
+from typing import List, Optional, Tuple
+from unittest.mock import ANY, AsyncMock, MagicMock, Mock, patch
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system-path
+import litellm
+from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
+from litellm.proxy._types import CallInfo, Litellm_EntityType
+
+
+class TestSlackAlerting(unittest.TestCase):
+    def setUp(self):
+        self.slack_alerting = SlackAlerting()
+
+    def test_get_percent_of_max_budget_left(self):
+        # Test case 1: When max_budget is None
+        user_info = CallInfo(
+            max_budget=None, spend=50.0, event_group=Litellm_EntityType.KEY
+        )
+        result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
+        self.assertEqual(result, 0.0)
+
+        # Test case 2: When max_budget is 0
+        user_info = CallInfo(
+            max_budget=0.0, spend=50.0, event_group=Litellm_EntityType.KEY
+        )
+        result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
+        self.assertEqual(result, 0.0)
+
+        # Test case 3: When spend is less than max_budget
+        user_info = CallInfo(
+            max_budget=100.0, spend=75.0, event_group=Litellm_EntityType.KEY
+        )
+        result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
+        self.assertEqual(result, 0.25)
+
+        # Test case 4: When spend equals max_budget
+        user_info = CallInfo(
+            max_budget=100.0, spend=100.0, event_group=Litellm_EntityType.KEY
+        )
+        result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
+        self.assertEqual(result, 0.0)
+
+        # Test case 5: When spend exceeds max_budget
+        user_info = CallInfo(
+            max_budget=100.0, spend=120.0, event_group=Litellm_EntityType.KEY
+        )
+        result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
+        self.assertEqual(result, -0.2)
+
+    def test_get_event_and_event_message_max_budget(self):
+        # Initial setup with no event
+        event = None
+        event_message = "Test Message: "
+
+        # Test case 1: When spend exceeds max_budget
+        user_info = CallInfo(
+            max_budget=100.0,
+            spend=120.0,
+            soft_budget=None,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=event, event_message=event_message
+        )
+        self.assertEqual(event, "budget_crossed")
+        self.assertTrue("Budget Crossed" in event_message)
+
+        # Test case 2: When 5% of max_budget is left
+        user_info = CallInfo(
+            max_budget=100.0,
+            spend=95.0,
+            soft_budget=None,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=event, event_message=event_message
+        )
+        self.assertEqual(event, "threshold_crossed")
+        self.assertTrue("5% Threshold Crossed" in event_message)
+
+        # Test case 3: When 15% of max_budget is left
+        user_info = CallInfo(
+            max_budget=100.0,
+            spend=85.0,
+            soft_budget=None,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=event, event_message=event_message
+        )
+        self.assertEqual(event, "threshold_crossed")
+        self.assertTrue("15% Threshold Crossed" in event_message)
+
+    def test_get_event_and_event_message_soft_budget(self):
+        # Initial setup with no event
+        event = None
+        event_message = "Test Message: "
+
+        # Test case 1: When spend exceeds soft_budget
+        user_info = CallInfo(
+            max_budget=None,
+            spend=120.0,
+            soft_budget=100.0,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=event, event_message=event_message
+        )
+        self.assertEqual(event, "soft_budget_crossed")
+        self.assertTrue("Total Soft Budget" in event_message)
+
+        # Test case 2: When spend is less than soft_budget
+        user_info = CallInfo(
+            max_budget=None,
+            spend=90.0,
+            soft_budget=100.0,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=None, event_message=event_message
+        )
+        print("got event", event)
+        print("got event_message", event_message)
+        self.assertEqual(event, None)  # No event should be triggered
+
+    def test_get_event_and_event_message_both_budgets(self):
+        # Initial setup with no event
+        event = None
+        event_message = "Test Message: "
+
+        # Test case 1: When spend exceeds both max_budget and soft_budget
+        user_info = CallInfo(
+            max_budget=150.0,
+            spend=160.0,
+            soft_budget=100.0,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=event, event_message=event_message
+        )
+        # budget_crossed has higher priority
+        self.assertEqual(event, "budget_crossed")
+        self.assertTrue("Budget Crossed" in event_message)
+
+        # Test case 2: When spend exceeds soft_budget but not max_budget
+        user_info = CallInfo(
+            max_budget=150.0,
+            spend=120.0,
+            soft_budget=100.0,
+            event_group=Litellm_EntityType.KEY,
+        )
+        event, event_message = self.slack_alerting._get_event_and_event_message(
+            user_info=user_info, event=event, event_message=event_message
+        )
+        self.assertEqual(event, "soft_budget_crossed")
+        self.assertTrue("Total Soft Budget" in event_message)
+
+    # Calling update_values with alerting args should try to start the periodic task
+    @patch("asyncio.create_task")
+    def test_update_values_starts_periodic_task(self, mock_create_task):
+        # Make it do nothing (or return a dummy future)
+        mock_create_task.return_value = AsyncMock()  # prevents awaiting errors
+
+        assert self.slack_alerting.periodic_started == False
+
+        self.slack_alerting.update_values(alerting_args={"slack_alerting": "True"})
+        assert self.slack_alerting.periodic_started == True
--- a/Development/litellm/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting_utils.py
+++ b/Development/litellm/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting_utils.py
@@ -0,0 +1,39 @@
+import json
+import os
+import sys
+from typing import Optional
+from unittest.mock import MagicMock
+
+import pytest
+
+# Adds the grandparent directory to sys.path to allow importing project modules
+sys.path.insert(0, os.path.abspath("../.."))
+
+import litellm
+from litellm.integrations.langfuse.langfuse_prompt_management import (
+    LangfusePromptManagement,
+)
+from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
+from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
+
+
+@pytest.mark.asyncio
+async def test_langfuse_not_initialized_returns_none_early():
+    """
+    Test that when no LangfusePromptManagement is initialized,
+    the function returns None immediately without executing further logic
+    """
+    # Ensure no Langfuse logger is in the callback manager
+    litellm.logging_callback_manager = LoggingCallbackManager()
+
+    # Create request data that would normally trigger processing
+    request_data = {"litellm_logging_obj": MagicMock(), "trace_id": "test-trace-id"}
+
+    # Call the function
+    result = await _add_langfuse_trace_id_to_alert(request_data)
+
+    # Should return None early without processing request_data
+    assert result is None
+
+    # Verify the litellm_logging_obj was never accessed (early return)
+    request_data["litellm_logging_obj"].assert_not_called()