mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
feat: Add graceful quota limit handling for LLM APIs
- Create LLMQuotaLimitExceededException following CrewAI's existing pattern - Add quota limit error handling in both streaming and non-streaming LLM calls - Update error handling in agent execution and crew agent executor - Add comprehensive tests for quota limit scenarios - Fixes issue #3434: Handle RateLimitError gracefully instead of crashing The implementation catches litellm.exceptions.RateLimitError and converts it to a CrewAI-specific exception, allowing tasks to detect quota limits and shut down gracefully instead of crashing with unhandled exceptions. Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -48,6 +48,10 @@ from crewai.events.types.memory_events import (
|
|||||||
MemoryRetrievalStartedEvent,
|
MemoryRetrievalStartedEvent,
|
||||||
MemoryRetrievalCompletedEvent,
|
MemoryRetrievalCompletedEvent,
|
||||||
)
|
)
|
||||||
|
from crewai.utilities.exceptions import (
|
||||||
|
LLMContextLengthExceededException,
|
||||||
|
LLMQuotaLimitExceededException,
|
||||||
|
)
|
||||||
from crewai.events.types.knowledge_events import (
|
from crewai.events.types.knowledge_events import (
|
||||||
KnowledgeQueryCompletedEvent,
|
KnowledgeQueryCompletedEvent,
|
||||||
KnowledgeQueryFailedEvent,
|
KnowledgeQueryFailedEvent,
|
||||||
@@ -461,6 +465,26 @@ class Agent(BaseAgent):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
except LLMContextLengthExceededException as e:
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
event=AgentExecutionErrorEvent(
|
||||||
|
agent=self,
|
||||||
|
task=task,
|
||||||
|
error=str(e),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
except LLMQuotaLimitExceededException as e:
|
||||||
|
crewai_event_bus.emit(
|
||||||
|
self,
|
||||||
|
event=AgentExecutionErrorEvent(
|
||||||
|
agent=self,
|
||||||
|
task=task,
|
||||||
|
error=str(e),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if e.__class__.__module__.startswith("litellm"):
|
if e.__class__.__module__.startswith("litellm"):
|
||||||
# Do not retry on litellm errors
|
# Do not retry on litellm errors
|
||||||
|
|||||||
@@ -35,6 +35,10 @@ from crewai.events.types.logging_events import (
|
|||||||
AgentLogsExecutionEvent,
|
AgentLogsExecutionEvent,
|
||||||
)
|
)
|
||||||
from crewai.events.event_bus import crewai_event_bus
|
from crewai.events.event_bus import crewai_event_bus
|
||||||
|
from crewai.utilities.exceptions import (
|
||||||
|
LLMContextLengthExceededException,
|
||||||
|
LLMQuotaLimitExceededException,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrewAgentExecutor(CrewAgentExecutorMixin):
|
class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||||
@@ -201,6 +205,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
|||||||
printer=self._printer,
|
printer=self._printer,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
except LLMContextLengthExceededException as e:
|
||||||
|
raise e
|
||||||
|
except LLMQuotaLimitExceededException as e:
|
||||||
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if e.__class__.__module__.startswith("litellm"):
|
if e.__class__.__module__.startswith("litellm"):
|
||||||
# Do not retry on litellm errors
|
# Do not retry on litellm errors
|
||||||
|
|||||||
@@ -35,12 +35,16 @@ from crewai.events.types.tool_usage_events import (
|
|||||||
ToolUsageFinishedEvent,
|
ToolUsageFinishedEvent,
|
||||||
ToolUsageErrorEvent,
|
ToolUsageErrorEvent,
|
||||||
)
|
)
|
||||||
|
from crewai.utilities.exceptions import (
|
||||||
|
LLMContextLengthExceededException,
|
||||||
|
LLMQuotaLimitExceededException,
|
||||||
|
)
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter("ignore", UserWarning)
|
warnings.simplefilter("ignore", UserWarning)
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Choices
|
from litellm import Choices
|
||||||
from litellm.exceptions import ContextWindowExceededError
|
from litellm.exceptions import ContextWindowExceededError, RateLimitError
|
||||||
from litellm.litellm_core_utils.get_supported_openai_params import (
|
from litellm.litellm_core_utils.get_supported_openai_params import (
|
||||||
get_supported_openai_params,
|
get_supported_openai_params,
|
||||||
)
|
)
|
||||||
@@ -669,6 +673,10 @@ class LLM(BaseLLM):
|
|||||||
)
|
)
|
||||||
return full_response
|
return full_response
|
||||||
|
|
||||||
|
except RateLimitError as e:
|
||||||
|
# Convert litellm's rate limit error to our own exception type
|
||||||
|
# for graceful quota limit handling
|
||||||
|
raise LLMQuotaLimitExceededException(str(e))
|
||||||
except ContextWindowExceededError as e:
|
except ContextWindowExceededError as e:
|
||||||
# Catch context window errors from litellm and convert them to our own exception type.
|
# Catch context window errors from litellm and convert them to our own exception type.
|
||||||
# This exception is handled by CrewAgentExecutor._invoke_loop() which can then
|
# This exception is handled by CrewAgentExecutor._invoke_loop() which can then
|
||||||
@@ -812,6 +820,10 @@ class LLM(BaseLLM):
|
|||||||
# length issues appropriately.
|
# length issues appropriately.
|
||||||
response = litellm.completion(**params)
|
response = litellm.completion(**params)
|
||||||
|
|
||||||
|
except RateLimitError as e:
|
||||||
|
# Convert litellm's rate limit error to our own exception type
|
||||||
|
# for graceful quota limit handling
|
||||||
|
raise LLMQuotaLimitExceededException(str(e))
|
||||||
except ContextWindowExceededError as e:
|
except ContextWindowExceededError as e:
|
||||||
# Convert litellm's context window error to our own exception type
|
# Convert litellm's context window error to our own exception type
|
||||||
# for consistent handling in the rest of the codebase
|
# for consistent handling in the rest of the codebase
|
||||||
|
|||||||
@@ -1 +1,4 @@
|
|||||||
"""Exceptions for crewAI."""
|
"""Exceptions for crewAI."""
|
||||||
|
|
||||||
|
from crewai.utilities.exceptions.context_window_exceeding_exception import LLMContextLengthExceededException
|
||||||
|
from crewai.utilities.exceptions.quota_limit_exception import LLMQuotaLimitExceededException
|
||||||
|
|||||||
31
src/crewai/utilities/exceptions/quota_limit_exception.py
Normal file
31
src/crewai/utilities/exceptions/quota_limit_exception.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
class LLMQuotaLimitExceededException(Exception):
|
||||||
|
QUOTA_LIMIT_ERRORS = [
|
||||||
|
"quota exceeded",
|
||||||
|
"rate limit exceeded",
|
||||||
|
"resource exhausted",
|
||||||
|
"too many requests",
|
||||||
|
"quota limit reached",
|
||||||
|
"api quota exceeded",
|
||||||
|
"usage limit exceeded",
|
||||||
|
"billing quota exceeded",
|
||||||
|
"request limit exceeded",
|
||||||
|
"daily quota exceeded",
|
||||||
|
"monthly quota exceeded",
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, error_message: str):
|
||||||
|
self.original_error_message = error_message
|
||||||
|
super().__init__(self._get_error_message(error_message))
|
||||||
|
|
||||||
|
def _is_quota_limit_error(self, error_message: str) -> bool:
|
||||||
|
return any(
|
||||||
|
phrase.lower() in error_message.lower()
|
||||||
|
for phrase in self.QUOTA_LIMIT_ERRORS
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_error_message(self, error_message: str):
|
||||||
|
return (
|
||||||
|
f"LLM quota limit exceeded. Original error: {error_message}\n"
|
||||||
|
"Your API quota or rate limit has been reached. Please check your API usage, "
|
||||||
|
"upgrade your plan, or wait for the quota to reset before retrying."
|
||||||
|
)
|
||||||
184
tests/test_quota_limit_handling.py
Normal file
184
tests/test_quota_limit_handling.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from litellm.exceptions import RateLimitError
|
||||||
|
|
||||||
|
from crewai.llm import LLM
|
||||||
|
from crewai.agent import Agent
|
||||||
|
from crewai.task import Task
|
||||||
|
from crewai.crew import Crew
|
||||||
|
from crewai.utilities.exceptions import LLMQuotaLimitExceededException
|
||||||
|
|
||||||
|
|
||||||
|
class TestQuotaLimitHandling:
|
||||||
|
"""Test suite for quota limit handling in CrewAI."""
|
||||||
|
|
||||||
|
def test_llm_non_streaming_quota_limit_exception(self):
|
||||||
|
"""Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in non-streaming mode."""
|
||||||
|
llm = LLM(model="gpt-3.5-turbo", stream=False)
|
||||||
|
|
||||||
|
with patch("litellm.completion") as mock_completion:
|
||||||
|
mock_completion.side_effect = RateLimitError("Rate limit exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
|
||||||
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
||||||
|
|
||||||
|
assert "quota limit exceeded" in str(exc_info.value).lower()
|
||||||
|
assert "Rate limit exceeded" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_llm_streaming_quota_limit_exception(self):
|
||||||
|
"""Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in streaming mode."""
|
||||||
|
llm = LLM(model="gpt-3.5-turbo", stream=True)
|
||||||
|
|
||||||
|
with patch("litellm.completion") as mock_completion:
|
||||||
|
mock_completion.side_effect = RateLimitError("API quota exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
|
||||||
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
||||||
|
|
||||||
|
assert "quota limit exceeded" in str(exc_info.value).lower()
|
||||||
|
assert "API quota exceeded" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_agent_handles_quota_limit_gracefully(self):
|
||||||
|
"""Test that Agent handles quota limit exceptions gracefully."""
|
||||||
|
agent = Agent(
|
||||||
|
role="Test Agent",
|
||||||
|
goal="Test goal",
|
||||||
|
backstory="Test backstory",
|
||||||
|
llm=LLM(model="gpt-3.5-turbo")
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(agent.llm, "call") as mock_call:
|
||||||
|
mock_call.side_effect = LLMQuotaLimitExceededException("Quota exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException):
|
||||||
|
agent.execute_task(
|
||||||
|
task=Task(description="Test task", agent=agent),
|
||||||
|
context="Test context"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_crew_handles_quota_limit_in_task_execution(self):
|
||||||
|
"""Test that Crew handles quota limit exceptions during task execution."""
|
||||||
|
agent = Agent(
|
||||||
|
role="Test Agent",
|
||||||
|
goal="Test goal",
|
||||||
|
backstory="Test backstory",
|
||||||
|
llm=LLM(model="gpt-3.5-turbo")
|
||||||
|
)
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
description="Test task",
|
||||||
|
agent=agent
|
||||||
|
)
|
||||||
|
|
||||||
|
crew = Crew(
|
||||||
|
agents=[agent],
|
||||||
|
tasks=[task]
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(agent.llm, "call") as mock_call:
|
||||||
|
mock_call.side_effect = LLMQuotaLimitExceededException("Monthly quota exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException):
|
||||||
|
crew.kickoff()
|
||||||
|
|
||||||
|
def test_quota_limit_exception_error_message_format(self):
|
||||||
|
"""Test that LLMQuotaLimitExceededException formats error messages correctly."""
|
||||||
|
original_error = "Resource exhausted: Quota exceeded for requests per day"
|
||||||
|
exception = LLMQuotaLimitExceededException(original_error)
|
||||||
|
|
||||||
|
error_message = str(exception)
|
||||||
|
assert "LLM quota limit exceeded" in error_message
|
||||||
|
assert original_error in error_message
|
||||||
|
assert "API quota or rate limit has been reached" in error_message
|
||||||
|
assert "upgrade your plan" in error_message
|
||||||
|
|
||||||
|
def test_quota_limit_exception_preserves_original_error(self):
|
||||||
|
"""Test that LLMQuotaLimitExceededException preserves the original error message."""
|
||||||
|
original_error = "429 Too Many Requests: Rate limit exceeded"
|
||||||
|
exception = LLMQuotaLimitExceededException(original_error)
|
||||||
|
|
||||||
|
assert exception.original_error_message == original_error
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("error_message,should_match", [
|
||||||
|
("quota exceeded", True),
|
||||||
|
("rate limit exceeded", True),
|
||||||
|
("resource exhausted", True),
|
||||||
|
("too many requests", True),
|
||||||
|
("quota limit reached", True),
|
||||||
|
("api quota exceeded", True),
|
||||||
|
("usage limit exceeded", True),
|
||||||
|
("billing quota exceeded", True),
|
||||||
|
("request limit exceeded", True),
|
||||||
|
("daily quota exceeded", True),
|
||||||
|
("monthly quota exceeded", True),
|
||||||
|
("QUOTA EXCEEDED", True), # Case insensitive
|
||||||
|
("Rate Limit Exceeded", True), # Case insensitive
|
||||||
|
("some other error", False),
|
||||||
|
("network timeout", False),
|
||||||
|
])
|
||||||
|
def test_quota_limit_error_detection(self, error_message, should_match):
|
||||||
|
"""Test that quota limit error detection works for various error messages."""
|
||||||
|
exception = LLMQuotaLimitExceededException(error_message)
|
||||||
|
assert exception._is_quota_limit_error(error_message) == should_match
|
||||||
|
|
||||||
|
def test_different_provider_quota_errors(self):
|
||||||
|
"""Test quota limit handling for different LLM providers."""
|
||||||
|
test_cases = [
|
||||||
|
"Rate limit reached for requests",
|
||||||
|
"rate_limit_error: Number of requests per minute exceeded",
|
||||||
|
"RESOURCE_EXHAUSTED: Quota exceeded",
|
||||||
|
"429 Too Many Requests",
|
||||||
|
]
|
||||||
|
|
||||||
|
llm = LLM(model="gpt-3.5-turbo")
|
||||||
|
|
||||||
|
for error_message in test_cases:
|
||||||
|
with patch("litellm.completion") as mock_completion:
|
||||||
|
mock_completion.side_effect = RateLimitError(error_message)
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
|
||||||
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
||||||
|
|
||||||
|
assert error_message in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_quota_limit_vs_context_window_exceptions(self):
|
||||||
|
"""Test that quota limit and context window exceptions are handled separately."""
|
||||||
|
from litellm.exceptions import ContextWindowExceededError
|
||||||
|
from crewai.utilities.exceptions import LLMContextLengthExceededException
|
||||||
|
|
||||||
|
llm = LLM(model="gpt-3.5-turbo")
|
||||||
|
|
||||||
|
with patch("litellm.completion") as mock_completion:
|
||||||
|
mock_completion.side_effect = RateLimitError("Quota exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException):
|
||||||
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
||||||
|
|
||||||
|
with patch("litellm.completion") as mock_completion:
|
||||||
|
mock_completion.side_effect = ContextWindowExceededError("Context length exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMContextLengthExceededException):
|
||||||
|
llm.call(messages=[{"role": "user", "content": "Hello"}])
|
||||||
|
|
||||||
|
def test_quota_limit_exception_in_crew_agent_executor(self):
|
||||||
|
"""Test that CrewAgentExecutor handles quota limit exceptions properly."""
|
||||||
|
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||||
|
|
||||||
|
agent = Agent(
|
||||||
|
role="Test Agent",
|
||||||
|
goal="Test goal",
|
||||||
|
backstory="Test backstory",
|
||||||
|
llm=LLM(model="gpt-3.5-turbo")
|
||||||
|
)
|
||||||
|
|
||||||
|
executor = CrewAgentExecutor(agent=agent)
|
||||||
|
|
||||||
|
with patch.object(agent.llm, "call") as mock_call:
|
||||||
|
mock_call.side_effect = LLMQuotaLimitExceededException("Daily quota exceeded")
|
||||||
|
|
||||||
|
with pytest.raises(LLMQuotaLimitExceededException):
|
||||||
|
executor.invoke({
|
||||||
|
"input": "Test input",
|
||||||
|
"chat_history": [],
|
||||||
|
"agent_scratchpad": ""
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user