Added LiteLLM to the stack
This commit is contained in:
@@ -0,0 +1,262 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Adds the grandparent directory to sys.path to allow importing project modules
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
from litellm.integrations.SlackAlerting.hanging_request_check import (
|
||||
AlertingHangingRequestCheck,
|
||||
)
|
||||
from litellm.types.integrations.slack_alerting import HangingRequestData
|
||||
|
||||
|
||||
class TestAlertingHangingRequestCheck:
|
||||
"""Test suite for AlertingHangingRequestCheck class"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_slack_alerting(self):
|
||||
"""Create a mock SlackAlerting object for testing"""
|
||||
mock_slack = MagicMock()
|
||||
mock_slack.alerting_threshold = 300 # 5 minutes
|
||||
mock_slack.send_alert = AsyncMock()
|
||||
return mock_slack
|
||||
|
||||
@pytest.fixture
|
||||
def hanging_request_checker(self, mock_slack_alerting):
|
||||
"""Create an AlertingHangingRequestCheck instance for testing"""
|
||||
return AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_init_creates_cache_with_correct_ttl(self, mock_slack_alerting):
|
||||
"""
|
||||
Test that initialization creates a hanging request cache with correct TTL.
|
||||
The TTL should be alerting_threshold + buffer time.
|
||||
"""
|
||||
checker = AlertingHangingRequestCheck(slack_alerting_object=mock_slack_alerting)
|
||||
|
||||
# The cache should be created with TTL = alerting_threshold + buffer time
|
||||
expected_ttl = (
|
||||
mock_slack_alerting.alerting_threshold + 60
|
||||
) # HANGING_ALERT_BUFFER_TIME_SECONDS
|
||||
assert checker.hanging_request_cache.default_ttl == expected_ttl
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_request_to_hanging_request_check_success(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test successfully adding a request to the hanging request cache.
|
||||
Should extract metadata and store HangingRequestData in cache.
|
||||
"""
|
||||
request_data = {
|
||||
"litellm_call_id": "test_request_123",
|
||||
"model": "gpt-4",
|
||||
"deployment": {"litellm_params": {"api_base": "https://api.openai.com/v1"}},
|
||||
"metadata": {
|
||||
"user_api_key_alias": "test_key",
|
||||
"user_api_key_team_alias": "test_team",
|
||||
},
|
||||
}
|
||||
|
||||
with patch("litellm.get_api_base", return_value="https://api.openai.com/v1"):
|
||||
await hanging_request_checker.add_request_to_hanging_request_check(
|
||||
request_data
|
||||
)
|
||||
|
||||
# Verify the request was added to cache
|
||||
cached_data = (
|
||||
await hanging_request_checker.hanging_request_cache.async_get_cache(
|
||||
key="test_request_123"
|
||||
)
|
||||
)
|
||||
|
||||
assert cached_data is not None
|
||||
assert isinstance(cached_data, HangingRequestData)
|
||||
assert cached_data.request_id == "test_request_123"
|
||||
assert cached_data.model == "gpt-4"
|
||||
assert cached_data.api_base == "https://api.openai.com/v1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_request_to_hanging_request_check_none_request_data(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test that passing None request_data returns early without error.
|
||||
Should handle gracefully when no request data is provided.
|
||||
"""
|
||||
result = await hanging_request_checker.add_request_to_hanging_request_check(
|
||||
None
|
||||
)
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_request_to_hanging_request_check_minimal_data(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test adding request with minimal required data.
|
||||
Should handle cases where optional fields are missing.
|
||||
"""
|
||||
request_data = {
|
||||
"litellm_call_id": "minimal_request_456",
|
||||
"model": "gpt-3.5-turbo",
|
||||
}
|
||||
|
||||
await hanging_request_checker.add_request_to_hanging_request_check(request_data)
|
||||
|
||||
cached_data = (
|
||||
await hanging_request_checker.hanging_request_cache.async_get_cache(
|
||||
key="minimal_request_456"
|
||||
)
|
||||
)
|
||||
|
||||
assert cached_data is not None
|
||||
assert cached_data.request_id == "minimal_request_456"
|
||||
assert cached_data.model == "gpt-3.5-turbo"
|
||||
assert cached_data.api_base is None
|
||||
assert cached_data.key_alias == ""
|
||||
assert cached_data.team_alias == ""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_hanging_request_alert(self, hanging_request_checker):
|
||||
"""
|
||||
Test sending a hanging request alert.
|
||||
Should format the alert message correctly and call slack alerting.
|
||||
"""
|
||||
hanging_request_data = HangingRequestData(
|
||||
request_id="test_hanging_request",
|
||||
model="gpt-4",
|
||||
api_base="https://api.openai.com/v1",
|
||||
key_alias="test_key",
|
||||
team_alias="test_team",
|
||||
)
|
||||
|
||||
await hanging_request_checker.send_hanging_request_alert(hanging_request_data)
|
||||
|
||||
# Verify slack alert was called
|
||||
hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()
|
||||
|
||||
# Check the alert message format
|
||||
call_args = hanging_request_checker.slack_alerting_object.send_alert.call_args
|
||||
message = call_args[1]["message"]
|
||||
|
||||
assert "Requests are hanging - 300s+ request time" in message
|
||||
assert "Request Model: `gpt-4`" in message
|
||||
assert "API Base: `https://api.openai.com/v1`" in message
|
||||
assert "Key Alias: `test_key`" in message
|
||||
assert "Team Alias: `test_team`" in message
|
||||
assert call_args[1]["level"] == "Medium"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_alerts_for_hanging_requests_no_proxy_logging(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test send_alerts_for_hanging_requests when proxy_logging_obj.internal_usage_cache is None.
|
||||
Should return early without processing when internal usage cache is unavailable.
|
||||
"""
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
||||
mock_proxy.internal_usage_cache = None
|
||||
|
||||
result = await hanging_request_checker.send_alerts_for_hanging_requests()
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_alerts_for_hanging_requests_with_completed_request(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test send_alerts_for_hanging_requests when request has completed (not hanging).
|
||||
Should remove completed requests from cache and not send alerts.
|
||||
"""
|
||||
# Add a request to the hanging cache
|
||||
hanging_data = HangingRequestData(
|
||||
request_id="completed_request_789",
|
||||
model="gpt-4",
|
||||
api_base="https://api.openai.com/v1",
|
||||
)
|
||||
await hanging_request_checker.hanging_request_cache.async_set_cache(
|
||||
key="completed_request_789", value=hanging_data, ttl=300
|
||||
)
|
||||
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
||||
# Mock internal usage cache to return a request status (meaning request completed)
|
||||
mock_internal_cache = AsyncMock()
|
||||
mock_internal_cache.async_get_cache.return_value = {"status": "success"}
|
||||
mock_proxy.internal_usage_cache = mock_internal_cache
|
||||
|
||||
# Mock the cache method to return our test request
|
||||
hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
|
||||
AsyncMock(return_value=["completed_request_789"])
|
||||
)
|
||||
|
||||
await hanging_request_checker.send_alerts_for_hanging_requests()
|
||||
|
||||
# Verify no alert was sent since request completed
|
||||
hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_alerts_for_hanging_requests_with_actual_hanging_request(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test send_alerts_for_hanging_requests when request is actually hanging.
|
||||
Should send alert for requests that haven't completed within threshold.
|
||||
"""
|
||||
# Add a hanging request to the cache
|
||||
hanging_data = HangingRequestData(
|
||||
request_id="hanging_request_999",
|
||||
model="gpt-4",
|
||||
api_base="https://api.openai.com/v1",
|
||||
key_alias="test_key",
|
||||
team_alias="test_team",
|
||||
)
|
||||
await hanging_request_checker.hanging_request_cache.async_set_cache(
|
||||
key="hanging_request_999", value=hanging_data, ttl=300
|
||||
)
|
||||
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
||||
# Mock internal usage cache to return None (meaning request is still hanging)
|
||||
mock_internal_cache = AsyncMock()
|
||||
mock_internal_cache.async_get_cache.return_value = None
|
||||
mock_proxy.internal_usage_cache = mock_internal_cache
|
||||
|
||||
# Mock the cache method to return our test request
|
||||
hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
|
||||
AsyncMock(return_value=["hanging_request_999"])
|
||||
)
|
||||
|
||||
await hanging_request_checker.send_alerts_for_hanging_requests()
|
||||
|
||||
# Verify alert was sent for hanging request
|
||||
hanging_request_checker.slack_alerting_object.send_alert.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_alerts_for_hanging_requests_with_missing_hanging_data(
|
||||
self, hanging_request_checker
|
||||
):
|
||||
"""
|
||||
Test send_alerts_for_hanging_requests when hanging request data is missing from cache.
|
||||
Should continue processing other requests when individual request data is missing.
|
||||
"""
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy:
|
||||
mock_internal_cache = AsyncMock()
|
||||
mock_proxy.internal_usage_cache = mock_internal_cache
|
||||
|
||||
# Mock cache to return request ID but no data (simulating expired or missing data)
|
||||
hanging_request_checker.hanging_request_cache.async_get_oldest_n_keys = (
|
||||
AsyncMock(return_value=["missing_request_111"])
|
||||
)
|
||||
hanging_request_checker.hanging_request_cache.async_get_cache = AsyncMock(
|
||||
return_value=None
|
||||
)
|
||||
|
||||
await hanging_request_checker.send_alerts_for_hanging_requests()
|
||||
|
||||
# Should not crash and should not send any alerts
|
||||
hanging_request_checker.slack_alerting_object.send_alert.assert_not_called()
|
@@ -0,0 +1,174 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from typing import List, Optional, Tuple
|
||||
from unittest.mock import ANY, AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system-path
|
||||
import litellm
|
||||
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
||||
from litellm.proxy._types import CallInfo, Litellm_EntityType
|
||||
|
||||
|
||||
class TestSlackAlerting(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.slack_alerting = SlackAlerting()
|
||||
|
||||
def test_get_percent_of_max_budget_left(self):
|
||||
# Test case 1: When max_budget is None
|
||||
user_info = CallInfo(
|
||||
max_budget=None, spend=50.0, event_group=Litellm_EntityType.KEY
|
||||
)
|
||||
result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
|
||||
self.assertEqual(result, 0.0)
|
||||
|
||||
# Test case 2: When max_budget is 0
|
||||
user_info = CallInfo(
|
||||
max_budget=0.0, spend=50.0, event_group=Litellm_EntityType.KEY
|
||||
)
|
||||
result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
|
||||
self.assertEqual(result, 0.0)
|
||||
|
||||
# Test case 3: When spend is less than max_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=100.0, spend=75.0, event_group=Litellm_EntityType.KEY
|
||||
)
|
||||
result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
|
||||
self.assertEqual(result, 0.25)
|
||||
|
||||
# Test case 4: When spend equals max_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=100.0, spend=100.0, event_group=Litellm_EntityType.KEY
|
||||
)
|
||||
result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
|
||||
self.assertEqual(result, 0.0)
|
||||
|
||||
# Test case 5: When spend exceeds max_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=100.0, spend=120.0, event_group=Litellm_EntityType.KEY
|
||||
)
|
||||
result = self.slack_alerting._get_percent_of_max_budget_left(user_info)
|
||||
self.assertEqual(result, -0.2)
|
||||
|
||||
def test_get_event_and_event_message_max_budget(self):
|
||||
# Initial setup with no event
|
||||
event = None
|
||||
event_message = "Test Message: "
|
||||
|
||||
# Test case 1: When spend exceeds max_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=100.0,
|
||||
spend=120.0,
|
||||
soft_budget=None,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=event, event_message=event_message
|
||||
)
|
||||
self.assertEqual(event, "budget_crossed")
|
||||
self.assertTrue("Budget Crossed" in event_message)
|
||||
|
||||
# Test case 2: When 5% of max_budget is left
|
||||
user_info = CallInfo(
|
||||
max_budget=100.0,
|
||||
spend=95.0,
|
||||
soft_budget=None,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=event, event_message=event_message
|
||||
)
|
||||
self.assertEqual(event, "threshold_crossed")
|
||||
self.assertTrue("5% Threshold Crossed" in event_message)
|
||||
|
||||
# Test case 3: When 15% of max_budget is left
|
||||
user_info = CallInfo(
|
||||
max_budget=100.0,
|
||||
spend=85.0,
|
||||
soft_budget=None,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=event, event_message=event_message
|
||||
)
|
||||
self.assertEqual(event, "threshold_crossed")
|
||||
self.assertTrue("15% Threshold Crossed" in event_message)
|
||||
|
||||
def test_get_event_and_event_message_soft_budget(self):
|
||||
# Initial setup with no event
|
||||
event = None
|
||||
event_message = "Test Message: "
|
||||
|
||||
# Test case 1: When spend exceeds soft_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=None,
|
||||
spend=120.0,
|
||||
soft_budget=100.0,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=event, event_message=event_message
|
||||
)
|
||||
self.assertEqual(event, "soft_budget_crossed")
|
||||
self.assertTrue("Total Soft Budget" in event_message)
|
||||
|
||||
# Test case 2: When spend is less than soft_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=None,
|
||||
spend=90.0,
|
||||
soft_budget=100.0,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=None, event_message=event_message
|
||||
)
|
||||
print("got event", event)
|
||||
print("got event_message", event_message)
|
||||
self.assertEqual(event, None) # No event should be triggered
|
||||
|
||||
def test_get_event_and_event_message_both_budgets(self):
|
||||
# Initial setup with no event
|
||||
event = None
|
||||
event_message = "Test Message: "
|
||||
|
||||
# Test case 1: When spend exceeds both max_budget and soft_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=150.0,
|
||||
spend=160.0,
|
||||
soft_budget=100.0,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=event, event_message=event_message
|
||||
)
|
||||
# budget_crossed has higher priority
|
||||
self.assertEqual(event, "budget_crossed")
|
||||
self.assertTrue("Budget Crossed" in event_message)
|
||||
|
||||
# Test case 2: When spend exceeds soft_budget but not max_budget
|
||||
user_info = CallInfo(
|
||||
max_budget=150.0,
|
||||
spend=120.0,
|
||||
soft_budget=100.0,
|
||||
event_group=Litellm_EntityType.KEY,
|
||||
)
|
||||
event, event_message = self.slack_alerting._get_event_and_event_message(
|
||||
user_info=user_info, event=event, event_message=event_message
|
||||
)
|
||||
self.assertEqual(event, "soft_budget_crossed")
|
||||
self.assertTrue("Total Soft Budget" in event_message)
|
||||
|
||||
# Calling update_values with alerting args should try to start the periodic task
|
||||
@patch("asyncio.create_task")
|
||||
def test_update_values_starts_periodic_task(self, mock_create_task):
|
||||
# Make it do nothing (or return a dummy future)
|
||||
mock_create_task.return_value = AsyncMock() # prevents awaiting errors
|
||||
|
||||
assert self.slack_alerting.periodic_started == False
|
||||
|
||||
self.slack_alerting.update_values(alerting_args={"slack_alerting": "True"})
|
||||
assert self.slack_alerting.periodic_started == True
|
@@ -0,0 +1,39 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
# Adds the grandparent directory to sys.path to allow importing project modules
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
import litellm
|
||||
from litellm.integrations.langfuse.langfuse_prompt_management import (
|
||||
LangfusePromptManagement,
|
||||
)
|
||||
from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
|
||||
from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_langfuse_not_initialized_returns_none_early():
|
||||
"""
|
||||
Test that when no LangfusePromptManagement is initialized,
|
||||
the function returns None immediately without executing further logic
|
||||
"""
|
||||
# Ensure no Langfuse logger is in the callback manager
|
||||
litellm.logging_callback_manager = LoggingCallbackManager()
|
||||
|
||||
# Create request data that would normally trigger processing
|
||||
request_data = {"litellm_logging_obj": MagicMock(), "trace_id": "test-trace-id"}
|
||||
|
||||
# Call the function
|
||||
result = await _add_langfuse_trace_id_to_alert(request_data)
|
||||
|
||||
# Should return None early without processing request_data
|
||||
assert result is None
|
||||
|
||||
# Verify the litellm_logging_obj was never accessed (early return)
|
||||
request_data["litellm_logging_obj"].assert_not_called()
|
Reference in New Issue
Block a user