From c763457e8de97805c75c89988e185d03a750c5c9 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:45:35 +0000 Subject: [PATCH] feat: Add graceful quota limit handling for LLM APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create LLMQuotaLimitExceededException following CrewAI's existing pattern - Add quota limit error handling in both streaming and non-streaming LLM calls - Update error handling in agent execution and crew agent executor - Add comprehensive tests for quota limit scenarios - Fixes issue #3434: Handle RateLimitError gracefully instead of crashing The implementation catches litellm.exceptions.RateLimitError and converts it to a CrewAI-specific exception, allowing tasks to detect quota limits and shut down gracefully instead of crashing with unhandled exceptions. Co-Authored-By: João --- src/crewai/agent.py | 24 +++ src/crewai/agents/crew_agent_executor.py | 8 + src/crewai/llm.py | 14 +- src/crewai/utilities/exceptions/__init__.py | 3 + .../exceptions/quota_limit_exception.py | 31 +++ tests/test_quota_limit_handling.py | 184 ++++++++++++++++++ 6 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 src/crewai/utilities/exceptions/quota_limit_exception.py create mode 100644 tests/test_quota_limit_handling.py diff --git a/src/crewai/agent.py b/src/crewai/agent.py index 122ddc82f..dc0a544c7 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -48,6 +48,10 @@ from crewai.events.types.memory_events import ( MemoryRetrievalStartedEvent, MemoryRetrievalCompletedEvent, ) +from crewai.utilities.exceptions import ( + LLMContextLengthExceededException, + LLMQuotaLimitExceededException, +) from crewai.events.types.knowledge_events import ( KnowledgeQueryCompletedEvent, KnowledgeQueryFailedEvent, @@ -461,6 +465,26 @@ class Agent(BaseAgent): ), ) raise e + except LLMContextLengthExceededException as e: + crewai_event_bus.emit( + self, + event=AgentExecutionErrorEvent( + agent=self, + task=task, + error=str(e), + ), + ) + raise e + except LLMQuotaLimitExceededException as e: + crewai_event_bus.emit( + self, + event=AgentExecutionErrorEvent( + agent=self, + task=task, + error=str(e), + ), + ) + raise e except Exception as e: if e.__class__.__module__.startswith("litellm"): # Do not retry on litellm errors diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py index 5ab4a09ea..23dbb1403 100644 --- a/src/crewai/agents/crew_agent_executor.py +++ b/src/crewai/agents/crew_agent_executor.py @@ -35,6 +35,10 @@ from crewai.events.types.logging_events import ( AgentLogsExecutionEvent, ) from crewai.events.event_bus import crewai_event_bus +from crewai.utilities.exceptions import ( + LLMContextLengthExceededException, + LLMQuotaLimitExceededException, +) class CrewAgentExecutor(CrewAgentExecutorMixin): @@ -201,6 +205,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): printer=self._printer, ) + except LLMContextLengthExceededException as e: + raise e + except LLMQuotaLimitExceededException as e: + raise e except Exception as e: if e.__class__.__module__.startswith("litellm"): # Do not retry on litellm errors diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 6e9d22edb..153aa5ff9 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -35,12 +35,16 @@ from crewai.events.types.tool_usage_events import ( ToolUsageFinishedEvent, ToolUsageErrorEvent, ) +from crewai.utilities.exceptions import ( + LLMContextLengthExceededException, + LLMQuotaLimitExceededException, +) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) import litellm from litellm import Choices - from litellm.exceptions import ContextWindowExceededError + from litellm.exceptions import ContextWindowExceededError, RateLimitError from litellm.litellm_core_utils.get_supported_openai_params import ( get_supported_openai_params, ) @@ -669,6 +673,10 @@ class LLM(BaseLLM): ) return full_response + except RateLimitError as e: + # Convert litellm's rate limit error to our own exception type + # for graceful quota limit handling + raise LLMQuotaLimitExceededException(str(e)) except ContextWindowExceededError as e: # Catch context window errors from litellm and convert them to our own exception type. # This exception is handled by CrewAgentExecutor._invoke_loop() which can then @@ -812,6 +820,10 @@ class LLM(BaseLLM): # length issues appropriately. response = litellm.completion(**params) + except RateLimitError as e: + # Convert litellm's rate limit error to our own exception type + # for graceful quota limit handling + raise LLMQuotaLimitExceededException(str(e)) except ContextWindowExceededError as e: # Convert litellm's context window error to our own exception type # for consistent handling in the rest of the codebase diff --git a/src/crewai/utilities/exceptions/__init__.py b/src/crewai/utilities/exceptions/__init__.py index ec686c100..92a967af0 100644 --- a/src/crewai/utilities/exceptions/__init__.py +++ b/src/crewai/utilities/exceptions/__init__.py @@ -1 +1,4 @@ """Exceptions for crewAI.""" + +from crewai.utilities.exceptions.context_window_exceeding_exception import LLMContextLengthExceededException +from crewai.utilities.exceptions.quota_limit_exception import LLMQuotaLimitExceededException diff --git a/src/crewai/utilities/exceptions/quota_limit_exception.py b/src/crewai/utilities/exceptions/quota_limit_exception.py new file mode 100644 index 000000000..3c0dad131 --- /dev/null +++ b/src/crewai/utilities/exceptions/quota_limit_exception.py @@ -0,0 +1,31 @@ +class LLMQuotaLimitExceededException(Exception): + QUOTA_LIMIT_ERRORS = [ + "quota exceeded", + "rate limit exceeded", + "resource exhausted", + "too many requests", + "quota limit reached", + "api quota exceeded", + "usage limit exceeded", + "billing quota exceeded", + "request limit exceeded", + "daily quota exceeded", + "monthly quota exceeded", + ] + + def __init__(self, error_message: str): + self.original_error_message = error_message + super().__init__(self._get_error_message(error_message)) + + def _is_quota_limit_error(self, error_message: str) -> bool: + return any( + phrase.lower() in error_message.lower() + for phrase in self.QUOTA_LIMIT_ERRORS + ) + + def _get_error_message(self, error_message: str): + return ( + f"LLM quota limit exceeded. Original error: {error_message}\n" + "Your API quota or rate limit has been reached. Please check your API usage, " + "upgrade your plan, or wait for the quota to reset before retrying." + ) diff --git a/tests/test_quota_limit_handling.py b/tests/test_quota_limit_handling.py new file mode 100644 index 000000000..4b87c304c --- /dev/null +++ b/tests/test_quota_limit_handling.py @@ -0,0 +1,184 @@ +import pytest +from unittest.mock import Mock, patch +from litellm.exceptions import RateLimitError + +from crewai.llm import LLM +from crewai.agent import Agent +from crewai.task import Task +from crewai.crew import Crew +from crewai.utilities.exceptions import LLMQuotaLimitExceededException + + +class TestQuotaLimitHandling: + """Test suite for quota limit handling in CrewAI.""" + + def test_llm_non_streaming_quota_limit_exception(self): + """Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in non-streaming mode.""" + llm = LLM(model="gpt-3.5-turbo", stream=False) + + with patch("litellm.completion") as mock_completion: + mock_completion.side_effect = RateLimitError("Rate limit exceeded") + + with pytest.raises(LLMQuotaLimitExceededException) as exc_info: + llm.call(messages=[{"role": "user", "content": "Hello"}]) + + assert "quota limit exceeded" in str(exc_info.value).lower() + assert "Rate limit exceeded" in str(exc_info.value) + + def test_llm_streaming_quota_limit_exception(self): + """Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in streaming mode.""" + llm = LLM(model="gpt-3.5-turbo", stream=True) + + with patch("litellm.completion") as mock_completion: + mock_completion.side_effect = RateLimitError("API quota exceeded") + + with pytest.raises(LLMQuotaLimitExceededException) as exc_info: + llm.call(messages=[{"role": "user", "content": "Hello"}]) + + assert "quota limit exceeded" in str(exc_info.value).lower() + assert "API quota exceeded" in str(exc_info.value) + + def test_agent_handles_quota_limit_gracefully(self): + """Test that Agent handles quota limit exceptions gracefully.""" + agent = Agent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + llm=LLM(model="gpt-3.5-turbo") + ) + + with patch.object(agent.llm, "call") as mock_call: + mock_call.side_effect = LLMQuotaLimitExceededException("Quota exceeded") + + with pytest.raises(LLMQuotaLimitExceededException): + agent.execute_task( + task=Task(description="Test task", agent=agent), + context="Test context" + ) + + def test_crew_handles_quota_limit_in_task_execution(self): + """Test that Crew handles quota limit exceptions during task execution.""" + agent = Agent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + llm=LLM(model="gpt-3.5-turbo") + ) + + task = Task( + description="Test task", + agent=agent + ) + + crew = Crew( + agents=[agent], + tasks=[task] + ) + + with patch.object(agent.llm, "call") as mock_call: + mock_call.side_effect = LLMQuotaLimitExceededException("Monthly quota exceeded") + + with pytest.raises(LLMQuotaLimitExceededException): + crew.kickoff() + + def test_quota_limit_exception_error_message_format(self): + """Test that LLMQuotaLimitExceededException formats error messages correctly.""" + original_error = "Resource exhausted: Quota exceeded for requests per day" + exception = LLMQuotaLimitExceededException(original_error) + + error_message = str(exception) + assert "LLM quota limit exceeded" in error_message + assert original_error in error_message + assert "API quota or rate limit has been reached" in error_message + assert "upgrade your plan" in error_message + + def test_quota_limit_exception_preserves_original_error(self): + """Test that LLMQuotaLimitExceededException preserves the original error message.""" + original_error = "429 Too Many Requests: Rate limit exceeded" + exception = LLMQuotaLimitExceededException(original_error) + + assert exception.original_error_message == original_error + + @pytest.mark.parametrize("error_message,should_match", [ + ("quota exceeded", True), + ("rate limit exceeded", True), + ("resource exhausted", True), + ("too many requests", True), + ("quota limit reached", True), + ("api quota exceeded", True), + ("usage limit exceeded", True), + ("billing quota exceeded", True), + ("request limit exceeded", True), + ("daily quota exceeded", True), + ("monthly quota exceeded", True), + ("QUOTA EXCEEDED", True), # Case insensitive + ("Rate Limit Exceeded", True), # Case insensitive + ("some other error", False), + ("network timeout", False), + ]) + def test_quota_limit_error_detection(self, error_message, should_match): + """Test that quota limit error detection works for various error messages.""" + exception = LLMQuotaLimitExceededException(error_message) + assert exception._is_quota_limit_error(error_message) == should_match + + def test_different_provider_quota_errors(self): + """Test quota limit handling for different LLM providers.""" + test_cases = [ + "Rate limit reached for requests", + "rate_limit_error: Number of requests per minute exceeded", + "RESOURCE_EXHAUSTED: Quota exceeded", + "429 Too Many Requests", + ] + + llm = LLM(model="gpt-3.5-turbo") + + for error_message in test_cases: + with patch("litellm.completion") as mock_completion: + mock_completion.side_effect = RateLimitError(error_message) + + with pytest.raises(LLMQuotaLimitExceededException) as exc_info: + llm.call(messages=[{"role": "user", "content": "Hello"}]) + + assert error_message in str(exc_info.value) + + def test_quota_limit_vs_context_window_exceptions(self): + """Test that quota limit and context window exceptions are handled separately.""" + from litellm.exceptions import ContextWindowExceededError + from crewai.utilities.exceptions import LLMContextLengthExceededException + + llm = LLM(model="gpt-3.5-turbo") + + with patch("litellm.completion") as mock_completion: + mock_completion.side_effect = RateLimitError("Quota exceeded") + + with pytest.raises(LLMQuotaLimitExceededException): + llm.call(messages=[{"role": "user", "content": "Hello"}]) + + with patch("litellm.completion") as mock_completion: + mock_completion.side_effect = ContextWindowExceededError("Context length exceeded") + + with pytest.raises(LLMContextLengthExceededException): + llm.call(messages=[{"role": "user", "content": "Hello"}]) + + def test_quota_limit_exception_in_crew_agent_executor(self): + """Test that CrewAgentExecutor handles quota limit exceptions properly.""" + from crewai.agents.crew_agent_executor import CrewAgentExecutor + + agent = Agent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + llm=LLM(model="gpt-3.5-turbo") + ) + + executor = CrewAgentExecutor(agent=agent) + + with patch.object(agent.llm, "call") as mock_call: + mock_call.side_effect = LLMQuotaLimitExceededException("Daily quota exceeded") + + with pytest.raises(LLMQuotaLimitExceededException): + executor.invoke({ + "input": "Test input", + "chat_history": [], + "agent_scratchpad": "" + })