Compare commits

...

3 Commits

Author SHA1 Message Date
Devin AI
ace1f48cf2 fix: restore typing imports to resolve F821 undefined name errors
- Add back Dict, List, Tuple, Type imports that are used throughout the code
- Consolidate duplicate imports in llm.py
- Match existing codebase patterns for type annotations
- Maintain quota limit handling functionality while fixing CI issues

Co-Authored-By: João <joao@crewai.com>
2025-09-02 16:54:27 +00:00
Devin AI
9af8e64205 fix: resolve lint issues with deprecated typing imports
- Add missing type imports (Dict, List, Type, DefaultDict, Tuple) to fix F821 undefined name errors
- Consolidate typing imports to avoid duplicate imports
- Remove unused imports to fix F401 and F811 errors
- Maintain quota limit handling functionality while fixing CI lint failures

Co-Authored-By: João <joao@crewai.com>
2025-09-02 16:52:10 +00:00
Devin AI
c763457e8d feat: Add graceful quota limit handling for LLM APIs
- Create LLMQuotaLimitExceededException following CrewAI's existing pattern
- Add quota limit error handling in both streaming and non-streaming LLM calls
- Update error handling in agent execution and crew agent executor
- Add comprehensive tests for quota limit scenarios
- Fixes issue #3434: Handle RateLimitError gracefully instead of crashing

The implementation catches litellm.exceptions.RateLimitError and converts it
to a CrewAI-specific exception, allowing tasks to detect quota limits and
shut down gracefully instead of crashing with unhandled exceptions.

Co-Authored-By: João <joao@crewai.com>
2025-09-02 16:45:35 +00:00
6 changed files with 263 additions and 4 deletions

View File

@@ -3,16 +3,15 @@ import subprocess
import time
from typing import (
Any,
Callable,
Dict,
List,
Literal,
Optional,
Sequence,
Tuple,
Type,
Union,
)
from collections.abc import Callable, Sequence
from pydantic import Field, InstanceOf, PrivateAttr, model_validator
@@ -48,6 +47,10 @@ from crewai.events.types.memory_events import (
MemoryRetrievalStartedEvent,
MemoryRetrievalCompletedEvent,
)
from crewai.utilities.exceptions import (
LLMContextLengthExceededException,
LLMQuotaLimitExceededException,
)
from crewai.events.types.knowledge_events import (
KnowledgeQueryCompletedEvent,
KnowledgeQueryFailedEvent,
@@ -461,6 +464,26 @@ class Agent(BaseAgent):
),
)
raise e
except LLMContextLengthExceededException as e:
crewai_event_bus.emit(
self,
event=AgentExecutionErrorEvent(
agent=self,
task=task,
error=str(e),
),
)
raise e
except LLMQuotaLimitExceededException as e:
crewai_event_bus.emit(
self,
event=AgentExecutionErrorEvent(
agent=self,
task=task,
error=str(e),
),
)
raise e
except Exception as e:
if e.__class__.__module__.startswith("litellm"):
# Do not retry on litellm errors

View File

@@ -1,4 +1,5 @@
from typing import Any, Callable, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union
from collections.abc import Callable
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.agents.agent_builder.base_agent_executor_mixin import CrewAgentExecutorMixin
@@ -35,6 +36,10 @@ from crewai.events.types.logging_events import (
AgentLogsExecutionEvent,
)
from crewai.events.event_bus import crewai_event_bus
from crewai.utilities.exceptions import (
LLMContextLengthExceededException,
LLMQuotaLimitExceededException,
)
class CrewAgentExecutor(CrewAgentExecutorMixin):
@@ -201,6 +206,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
printer=self._printer,
)
except LLMContextLengthExceededException as e:
raise e
except LLMQuotaLimitExceededException as e:
raise e
except Exception as e:
if e.__class__.__module__.startswith("litellm"):
# Do not retry on litellm errors

View File

@@ -35,12 +35,13 @@ from crewai.events.types.tool_usage_events import (
ToolUsageFinishedEvent,
ToolUsageErrorEvent,
)
from crewai.utilities.exceptions import LLMQuotaLimitExceededException
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
import litellm
from litellm import Choices
from litellm.exceptions import ContextWindowExceededError
from litellm.exceptions import ContextWindowExceededError, RateLimitError
from litellm.litellm_core_utils.get_supported_openai_params import (
get_supported_openai_params,
)
@@ -669,6 +670,10 @@ class LLM(BaseLLM):
)
return full_response
except RateLimitError as e:
# Convert litellm's rate limit error to our own exception type
# for graceful quota limit handling
raise LLMQuotaLimitExceededException(str(e))
except ContextWindowExceededError as e:
# Catch context window errors from litellm and convert them to our own exception type.
# This exception is handled by CrewAgentExecutor._invoke_loop() which can then
@@ -812,6 +817,10 @@ class LLM(BaseLLM):
# length issues appropriately.
response = litellm.completion(**params)
except RateLimitError as e:
# Convert litellm's rate limit error to our own exception type
# for graceful quota limit handling
raise LLMQuotaLimitExceededException(str(e))
except ContextWindowExceededError as e:
# Convert litellm's context window error to our own exception type
# for consistent handling in the rest of the codebase

View File

@@ -1 +1,4 @@
"""Exceptions for crewAI."""
from crewai.utilities.exceptions.context_window_exceeding_exception import LLMContextLengthExceededException
from crewai.utilities.exceptions.quota_limit_exception import LLMQuotaLimitExceededException

View File

@@ -0,0 +1,31 @@
class LLMQuotaLimitExceededException(Exception):
QUOTA_LIMIT_ERRORS = [
"quota exceeded",
"rate limit exceeded",
"resource exhausted",
"too many requests",
"quota limit reached",
"api quota exceeded",
"usage limit exceeded",
"billing quota exceeded",
"request limit exceeded",
"daily quota exceeded",
"monthly quota exceeded",
]
def __init__(self, error_message: str):
self.original_error_message = error_message
super().__init__(self._get_error_message(error_message))
def _is_quota_limit_error(self, error_message: str) -> bool:
return any(
phrase.lower() in error_message.lower()
for phrase in self.QUOTA_LIMIT_ERRORS
)
def _get_error_message(self, error_message: str):
return (
f"LLM quota limit exceeded. Original error: {error_message}\n"
"Your API quota or rate limit has been reached. Please check your API usage, "
"upgrade your plan, or wait for the quota to reset before retrying."
)

View File

@@ -0,0 +1,184 @@
import pytest
from unittest.mock import Mock, patch
from litellm.exceptions import RateLimitError
from crewai.llm import LLM
from crewai.agent import Agent
from crewai.task import Task
from crewai.crew import Crew
from crewai.utilities.exceptions import LLMQuotaLimitExceededException
class TestQuotaLimitHandling:
"""Test suite for quota limit handling in CrewAI."""
def test_llm_non_streaming_quota_limit_exception(self):
"""Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in non-streaming mode."""
llm = LLM(model="gpt-3.5-turbo", stream=False)
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = RateLimitError("Rate limit exceeded")
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
llm.call(messages=[{"role": "user", "content": "Hello"}])
assert "quota limit exceeded" in str(exc_info.value).lower()
assert "Rate limit exceeded" in str(exc_info.value)
def test_llm_streaming_quota_limit_exception(self):
"""Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in streaming mode."""
llm = LLM(model="gpt-3.5-turbo", stream=True)
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = RateLimitError("API quota exceeded")
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
llm.call(messages=[{"role": "user", "content": "Hello"}])
assert "quota limit exceeded" in str(exc_info.value).lower()
assert "API quota exceeded" in str(exc_info.value)
def test_agent_handles_quota_limit_gracefully(self):
"""Test that Agent handles quota limit exceptions gracefully."""
agent = Agent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=LLM(model="gpt-3.5-turbo")
)
with patch.object(agent.llm, "call") as mock_call:
mock_call.side_effect = LLMQuotaLimitExceededException("Quota exceeded")
with pytest.raises(LLMQuotaLimitExceededException):
agent.execute_task(
task=Task(description="Test task", agent=agent),
context="Test context"
)
def test_crew_handles_quota_limit_in_task_execution(self):
"""Test that Crew handles quota limit exceptions during task execution."""
agent = Agent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=LLM(model="gpt-3.5-turbo")
)
task = Task(
description="Test task",
agent=agent
)
crew = Crew(
agents=[agent],
tasks=[task]
)
with patch.object(agent.llm, "call") as mock_call:
mock_call.side_effect = LLMQuotaLimitExceededException("Monthly quota exceeded")
with pytest.raises(LLMQuotaLimitExceededException):
crew.kickoff()
def test_quota_limit_exception_error_message_format(self):
"""Test that LLMQuotaLimitExceededException formats error messages correctly."""
original_error = "Resource exhausted: Quota exceeded for requests per day"
exception = LLMQuotaLimitExceededException(original_error)
error_message = str(exception)
assert "LLM quota limit exceeded" in error_message
assert original_error in error_message
assert "API quota or rate limit has been reached" in error_message
assert "upgrade your plan" in error_message
def test_quota_limit_exception_preserves_original_error(self):
"""Test that LLMQuotaLimitExceededException preserves the original error message."""
original_error = "429 Too Many Requests: Rate limit exceeded"
exception = LLMQuotaLimitExceededException(original_error)
assert exception.original_error_message == original_error
@pytest.mark.parametrize("error_message,should_match", [
("quota exceeded", True),
("rate limit exceeded", True),
("resource exhausted", True),
("too many requests", True),
("quota limit reached", True),
("api quota exceeded", True),
("usage limit exceeded", True),
("billing quota exceeded", True),
("request limit exceeded", True),
("daily quota exceeded", True),
("monthly quota exceeded", True),
("QUOTA EXCEEDED", True), # Case insensitive
("Rate Limit Exceeded", True), # Case insensitive
("some other error", False),
("network timeout", False),
])
def test_quota_limit_error_detection(self, error_message, should_match):
"""Test that quota limit error detection works for various error messages."""
exception = LLMQuotaLimitExceededException(error_message)
assert exception._is_quota_limit_error(error_message) == should_match
def test_different_provider_quota_errors(self):
"""Test quota limit handling for different LLM providers."""
test_cases = [
"Rate limit reached for requests",
"rate_limit_error: Number of requests per minute exceeded",
"RESOURCE_EXHAUSTED: Quota exceeded",
"429 Too Many Requests",
]
llm = LLM(model="gpt-3.5-turbo")
for error_message in test_cases:
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = RateLimitError(error_message)
with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
llm.call(messages=[{"role": "user", "content": "Hello"}])
assert error_message in str(exc_info.value)
def test_quota_limit_vs_context_window_exceptions(self):
"""Test that quota limit and context window exceptions are handled separately."""
from litellm.exceptions import ContextWindowExceededError
from crewai.utilities.exceptions import LLMContextLengthExceededException
llm = LLM(model="gpt-3.5-turbo")
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = RateLimitError("Quota exceeded")
with pytest.raises(LLMQuotaLimitExceededException):
llm.call(messages=[{"role": "user", "content": "Hello"}])
with patch("litellm.completion") as mock_completion:
mock_completion.side_effect = ContextWindowExceededError("Context length exceeded")
with pytest.raises(LLMContextLengthExceededException):
llm.call(messages=[{"role": "user", "content": "Hello"}])
def test_quota_limit_exception_in_crew_agent_executor(self):
"""Test that CrewAgentExecutor handles quota limit exceptions properly."""
from crewai.agents.crew_agent_executor import CrewAgentExecutor
agent = Agent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
llm=LLM(model="gpt-3.5-turbo")
)
executor = CrewAgentExecutor(agent=agent)
with patch.object(agent.llm, "call") as mock_call:
mock_call.side_effect = LLMQuotaLimitExceededException("Daily quota exceeded")
with pytest.raises(LLMQuotaLimitExceededException):
executor.invoke({
"input": "Test input",
"chat_history": [],
"agent_scratchpad": ""
})