mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
- Create LLMQuotaLimitExceededException following CrewAI's existing pattern - Add quota limit error handling in both streaming and non-streaming LLM calls - Update error handling in agent execution and crew agent executor - Add comprehensive tests for quota limit scenarios - Fixes issue #3434: Handle RateLimitError gracefully instead of crashing The implementation catches litellm.exceptions.RateLimitError and converts it to a CrewAI-specific exception, allowing tasks to detect quota limits and shut down gracefully instead of crashing with unhandled exceptions. Co-Authored-By: João <joao@crewai.com>
185 lines
7.7 KiB
Python
185 lines
7.7 KiB
Python
import pytest
|
|
from unittest.mock import Mock, patch
|
|
from litellm.exceptions import RateLimitError
|
|
|
|
from crewai.llm import LLM
|
|
from crewai.agent import Agent
|
|
from crewai.task import Task
|
|
from crewai.crew import Crew
|
|
from crewai.utilities.exceptions import LLMQuotaLimitExceededException
|
|
|
|
|
|
class TestQuotaLimitHandling:
|
|
"""Test suite for quota limit handling in CrewAI."""
|
|
|
|
def test_llm_non_streaming_quota_limit_exception(self):
|
|
"""Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in non-streaming mode."""
|
|
llm = LLM(model="gpt-3.5-turbo", stream=False)
|
|
|
|
with patch("litellm.completion") as mock_completion:
|
|
mock_completion.side_effect = RateLimitError("Rate limit exceeded")
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
|
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
|
|
|
assert "quota limit exceeded" in str(exc_info.value).lower()
|
|
assert "Rate limit exceeded" in str(exc_info.value)
|
|
|
|
def test_llm_streaming_quota_limit_exception(self):
|
|
"""Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in streaming mode."""
|
|
llm = LLM(model="gpt-3.5-turbo", stream=True)
|
|
|
|
with patch("litellm.completion") as mock_completion:
|
|
mock_completion.side_effect = RateLimitError("API quota exceeded")
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
|
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
|
|
|
assert "quota limit exceeded" in str(exc_info.value).lower()
|
|
assert "API quota exceeded" in str(exc_info.value)
|
|
|
|
def test_agent_handles_quota_limit_gracefully(self):
|
|
"""Test that Agent handles quota limit exceptions gracefully."""
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="Test goal",
|
|
backstory="Test backstory",
|
|
llm=LLM(model="gpt-3.5-turbo")
|
|
)
|
|
|
|
with patch.object(agent.llm, "call") as mock_call:
|
|
mock_call.side_effect = LLMQuotaLimitExceededException("Quota exceeded")
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException):
|
|
agent.execute_task(
|
|
task=Task(description="Test task", agent=agent),
|
|
context="Test context"
|
|
)
|
|
|
|
def test_crew_handles_quota_limit_in_task_execution(self):
|
|
"""Test that Crew handles quota limit exceptions during task execution."""
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="Test goal",
|
|
backstory="Test backstory",
|
|
llm=LLM(model="gpt-3.5-turbo")
|
|
)
|
|
|
|
task = Task(
|
|
description="Test task",
|
|
agent=agent
|
|
)
|
|
|
|
crew = Crew(
|
|
agents=[agent],
|
|
tasks=[task]
|
|
)
|
|
|
|
with patch.object(agent.llm, "call") as mock_call:
|
|
mock_call.side_effect = LLMQuotaLimitExceededException("Monthly quota exceeded")
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException):
|
|
crew.kickoff()
|
|
|
|
def test_quota_limit_exception_error_message_format(self):
|
|
"""Test that LLMQuotaLimitExceededException formats error messages correctly."""
|
|
original_error = "Resource exhausted: Quota exceeded for requests per day"
|
|
exception = LLMQuotaLimitExceededException(original_error)
|
|
|
|
error_message = str(exception)
|
|
assert "LLM quota limit exceeded" in error_message
|
|
assert original_error in error_message
|
|
assert "API quota or rate limit has been reached" in error_message
|
|
assert "upgrade your plan" in error_message
|
|
|
|
def test_quota_limit_exception_preserves_original_error(self):
|
|
"""Test that LLMQuotaLimitExceededException preserves the original error message."""
|
|
original_error = "429 Too Many Requests: Rate limit exceeded"
|
|
exception = LLMQuotaLimitExceededException(original_error)
|
|
|
|
assert exception.original_error_message == original_error
|
|
|
|
@pytest.mark.parametrize("error_message,should_match", [
|
|
("quota exceeded", True),
|
|
("rate limit exceeded", True),
|
|
("resource exhausted", True),
|
|
("too many requests", True),
|
|
("quota limit reached", True),
|
|
("api quota exceeded", True),
|
|
("usage limit exceeded", True),
|
|
("billing quota exceeded", True),
|
|
("request limit exceeded", True),
|
|
("daily quota exceeded", True),
|
|
("monthly quota exceeded", True),
|
|
("QUOTA EXCEEDED", True), # Case insensitive
|
|
("Rate Limit Exceeded", True), # Case insensitive
|
|
("some other error", False),
|
|
("network timeout", False),
|
|
])
|
|
def test_quota_limit_error_detection(self, error_message, should_match):
|
|
"""Test that quota limit error detection works for various error messages."""
|
|
exception = LLMQuotaLimitExceededException(error_message)
|
|
assert exception._is_quota_limit_error(error_message) == should_match
|
|
|
|
def test_different_provider_quota_errors(self):
|
|
"""Test quota limit handling for different LLM providers."""
|
|
test_cases = [
|
|
"Rate limit reached for requests",
|
|
"rate_limit_error: Number of requests per minute exceeded",
|
|
"RESOURCE_EXHAUSTED: Quota exceeded",
|
|
"429 Too Many Requests",
|
|
]
|
|
|
|
llm = LLM(model="gpt-3.5-turbo")
|
|
|
|
for error_message in test_cases:
|
|
with patch("litellm.completion") as mock_completion:
|
|
mock_completion.side_effect = RateLimitError(error_message)
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
|
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
|
|
|
assert error_message in str(exc_info.value)
|
|
|
|
def test_quota_limit_vs_context_window_exceptions(self):
|
|
"""Test that quota limit and context window exceptions are handled separately."""
|
|
from litellm.exceptions import ContextWindowExceededError
|
|
from crewai.utilities.exceptions import LLMContextLengthExceededException
|
|
|
|
llm = LLM(model="gpt-3.5-turbo")
|
|
|
|
with patch("litellm.completion") as mock_completion:
|
|
mock_completion.side_effect = RateLimitError("Quota exceeded")
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException):
|
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
|
|
|
with patch("litellm.completion") as mock_completion:
|
|
mock_completion.side_effect = ContextWindowExceededError("Context length exceeded")
|
|
|
|
with pytest.raises(LLMContextLengthExceededException):
|
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
|
|
|
def test_quota_limit_exception_in_crew_agent_executor(self):
|
|
"""Test that CrewAgentExecutor handles quota limit exceptions properly."""
|
|
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="Test goal",
|
|
backstory="Test backstory",
|
|
llm=LLM(model="gpt-3.5-turbo")
|
|
)
|
|
|
|
executor = CrewAgentExecutor(agent=agent)
|
|
|
|
with patch.object(agent.llm, "call") as mock_call:
|
|
mock_call.side_effect = LLMQuotaLimitExceededException("Daily quota exceeded")
|
|
|
|
with pytest.raises(LLMQuotaLimitExceededException):
|
|
executor.invoke({
|
|
"input": "Test input",
|
|
"chat_history": [],
|
|
"agent_scratchpad": ""
|
|
})
|