From c763457e8de97805c75c89988e185d03a750c5c9 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 2 Sep 2025 16:45:35 +0000
Subject: [PATCH] feat: Add graceful quota limit handling for LLM APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Create LLMQuotaLimitExceededException following CrewAI's existing pattern
- Add quota limit error handling in both streaming and non-streaming LLM calls
- Update error handling in agent execution and crew agent executor
- Add comprehensive tests for quota limit scenarios
- Fixes issue #3434: Handle RateLimitError gracefully instead of crashing

The implementation catches litellm.exceptions.RateLimitError and converts it
to a CrewAI-specific exception, allowing tasks to detect quota limits and
shut down gracefully instead of crashing with unhandled exceptions.

Co-Authored-By: João <joao@crewai.com>
---
 src/crewai/agent.py                           |  24 +++
 src/crewai/agents/crew_agent_executor.py      |   8 +
 src/crewai/llm.py                             |  14 +-
 src/crewai/utilities/exceptions/__init__.py   |   3 +
 .../exceptions/quota_limit_exception.py       |  31 +++
 tests/test_quota_limit_handling.py            | 184 ++++++++++++++++++
 6 files changed, 263 insertions(+), 1 deletion(-)
 create mode 100644 src/crewai/utilities/exceptions/quota_limit_exception.py
 create mode 100644 tests/test_quota_limit_handling.py

diff --git a/src/crewai/agent.py b/src/crewai/agent.py
index 122ddc82f..dc0a544c7 100644
--- a/src/crewai/agent.py
+++ b/src/crewai/agent.py
@@ -48,6 +48,10 @@ from crewai.events.types.memory_events import (
     MemoryRetrievalStartedEvent,
     MemoryRetrievalCompletedEvent,
 )
+from crewai.utilities.exceptions import (
+    LLMContextLengthExceededException,
+    LLMQuotaLimitExceededException,
+)
 from crewai.events.types.knowledge_events import (
     KnowledgeQueryCompletedEvent,
     KnowledgeQueryFailedEvent,
@@ -461,6 +465,26 @@ class Agent(BaseAgent):
                 ),
             )
             raise e
+        except LLMContextLengthExceededException as e:
+            crewai_event_bus.emit(
+                self,
+                event=AgentExecutionErrorEvent(
+                    agent=self,
+                    task=task,
+                    error=str(e),
+                ),
+            )
+            raise e
+        except LLMQuotaLimitExceededException as e:
+            crewai_event_bus.emit(
+                self,
+                event=AgentExecutionErrorEvent(
+                    agent=self,
+                    task=task,
+                    error=str(e),
+                ),
+            )
+            raise e
         except Exception as e:
             if e.__class__.__module__.startswith("litellm"):
                 # Do not retry on litellm errors
diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py
index 5ab4a09ea..23dbb1403 100644
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -35,6 +35,10 @@ from crewai.events.types.logging_events import (
     AgentLogsExecutionEvent,
 )
 from crewai.events.event_bus import crewai_event_bus
+from crewai.utilities.exceptions import (
+    LLMContextLengthExceededException,
+    LLMQuotaLimitExceededException,
+)
 
 
 class CrewAgentExecutor(CrewAgentExecutorMixin):
@@ -201,6 +205,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                     printer=self._printer,
                 )
 
+            except LLMContextLengthExceededException as e:
+                raise e
+            except LLMQuotaLimitExceededException as e:
+                raise e
             except Exception as e:
                 if e.__class__.__module__.startswith("litellm"):
                     # Do not retry on litellm errors
diff --git a/src/crewai/llm.py b/src/crewai/llm.py
index 6e9d22edb..153aa5ff9 100644
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -35,12 +35,16 @@ from crewai.events.types.tool_usage_events import (
     ToolUsageFinishedEvent,
     ToolUsageErrorEvent,
 )
+from crewai.utilities.exceptions import (
+    LLMContextLengthExceededException,
+    LLMQuotaLimitExceededException,
+)
 
 with warnings.catch_warnings():
     warnings.simplefilter("ignore", UserWarning)
     import litellm
     from litellm import Choices
-    from litellm.exceptions import ContextWindowExceededError
+    from litellm.exceptions import ContextWindowExceededError, RateLimitError
     from litellm.litellm_core_utils.get_supported_openai_params import (
         get_supported_openai_params,
     )
@@ -669,6 +673,10 @@ class LLM(BaseLLM):
             )
             return full_response
 
+        except RateLimitError as e:
+            # Convert litellm's rate limit error to our own exception type
+            # for graceful quota limit handling
+            raise LLMQuotaLimitExceededException(str(e))
         except ContextWindowExceededError as e:
             # Catch context window errors from litellm and convert them to our own exception type.
             # This exception is handled by CrewAgentExecutor._invoke_loop() which can then
@@ -812,6 +820,10 @@ class LLM(BaseLLM):
             # length issues appropriately.
             response = litellm.completion(**params)
 
+        except RateLimitError as e:
+            # Convert litellm's rate limit error to our own exception type
+            # for graceful quota limit handling
+            raise LLMQuotaLimitExceededException(str(e))
         except ContextWindowExceededError as e:
             # Convert litellm's context window error to our own exception type
             # for consistent handling in the rest of the codebase
diff --git a/src/crewai/utilities/exceptions/__init__.py b/src/crewai/utilities/exceptions/__init__.py
index ec686c100..92a967af0 100644
--- a/src/crewai/utilities/exceptions/__init__.py
+++ b/src/crewai/utilities/exceptions/__init__.py
@@ -1 +1,4 @@
 """Exceptions for crewAI."""
+
+from crewai.utilities.exceptions.context_window_exceeding_exception import LLMContextLengthExceededException
+from crewai.utilities.exceptions.quota_limit_exception import LLMQuotaLimitExceededException
diff --git a/src/crewai/utilities/exceptions/quota_limit_exception.py b/src/crewai/utilities/exceptions/quota_limit_exception.py
new file mode 100644
index 000000000..3c0dad131
--- /dev/null
+++ b/src/crewai/utilities/exceptions/quota_limit_exception.py
@@ -0,0 +1,31 @@
+class LLMQuotaLimitExceededException(Exception):
+    QUOTA_LIMIT_ERRORS = [
+        "quota exceeded",
+        "rate limit exceeded",
+        "resource exhausted",
+        "too many requests",
+        "quota limit reached",
+        "api quota exceeded",
+        "usage limit exceeded",
+        "billing quota exceeded",
+        "request limit exceeded",
+        "daily quota exceeded",
+        "monthly quota exceeded",
+    ]
+
+    def __init__(self, error_message: str):
+        self.original_error_message = error_message
+        super().__init__(self._get_error_message(error_message))
+
+    def _is_quota_limit_error(self, error_message: str) -> bool:
+        return any(
+            phrase.lower() in error_message.lower()
+            for phrase in self.QUOTA_LIMIT_ERRORS
+        )
+
+    def _get_error_message(self, error_message: str):
+        return (
+            f"LLM quota limit exceeded. Original error: {error_message}\n"
+            "Your API quota or rate limit has been reached. Please check your API usage, "
+            "upgrade your plan, or wait for the quota to reset before retrying."
+        )
diff --git a/tests/test_quota_limit_handling.py b/tests/test_quota_limit_handling.py
new file mode 100644
index 000000000..4b87c304c
--- /dev/null
+++ b/tests/test_quota_limit_handling.py
@@ -0,0 +1,184 @@
+import pytest
+from unittest.mock import Mock, patch
+from litellm.exceptions import RateLimitError
+
+from crewai.llm import LLM
+from crewai.agent import Agent
+from crewai.task import Task
+from crewai.crew import Crew
+from crewai.utilities.exceptions import LLMQuotaLimitExceededException
+
+
+class TestQuotaLimitHandling:
+    """Test suite for quota limit handling in CrewAI."""
+
+    def test_llm_non_streaming_quota_limit_exception(self):
+        """Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in non-streaming mode."""
+        llm = LLM(model="gpt-3.5-turbo", stream=False)
+        
+        with patch("litellm.completion") as mock_completion:
+            mock_completion.side_effect = RateLimitError("Rate limit exceeded")
+            
+            with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
+                llm.call(messages=[{"role": "user", "content": "Hello"}])
+            
+            assert "quota limit exceeded" in str(exc_info.value).lower()
+            assert "Rate limit exceeded" in str(exc_info.value)
+
+    def test_llm_streaming_quota_limit_exception(self):
+        """Test that LLM raises LLMQuotaLimitExceededException for rate limit errors in streaming mode."""
+        llm = LLM(model="gpt-3.5-turbo", stream=True)
+        
+        with patch("litellm.completion") as mock_completion:
+            mock_completion.side_effect = RateLimitError("API quota exceeded")
+            
+            with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
+                llm.call(messages=[{"role": "user", "content": "Hello"}])
+            
+            assert "quota limit exceeded" in str(exc_info.value).lower()
+            assert "API quota exceeded" in str(exc_info.value)
+
+    def test_agent_handles_quota_limit_gracefully(self):
+        """Test that Agent handles quota limit exceptions gracefully."""
+        agent = Agent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory",
+            llm=LLM(model="gpt-3.5-turbo")
+        )
+        
+        with patch.object(agent.llm, "call") as mock_call:
+            mock_call.side_effect = LLMQuotaLimitExceededException("Quota exceeded")
+            
+            with pytest.raises(LLMQuotaLimitExceededException):
+                agent.execute_task(
+                    task=Task(description="Test task", agent=agent),
+                    context="Test context"
+                )
+
+    def test_crew_handles_quota_limit_in_task_execution(self):
+        """Test that Crew handles quota limit exceptions during task execution."""
+        agent = Agent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory",
+            llm=LLM(model="gpt-3.5-turbo")
+        )
+        
+        task = Task(
+            description="Test task",
+            agent=agent
+        )
+        
+        crew = Crew(
+            agents=[agent],
+            tasks=[task]
+        )
+        
+        with patch.object(agent.llm, "call") as mock_call:
+            mock_call.side_effect = LLMQuotaLimitExceededException("Monthly quota exceeded")
+            
+            with pytest.raises(LLMQuotaLimitExceededException):
+                crew.kickoff()
+
+    def test_quota_limit_exception_error_message_format(self):
+        """Test that LLMQuotaLimitExceededException formats error messages correctly."""
+        original_error = "Resource exhausted: Quota exceeded for requests per day"
+        exception = LLMQuotaLimitExceededException(original_error)
+        
+        error_message = str(exception)
+        assert "LLM quota limit exceeded" in error_message
+        assert original_error in error_message
+        assert "API quota or rate limit has been reached" in error_message
+        assert "upgrade your plan" in error_message
+
+    def test_quota_limit_exception_preserves_original_error(self):
+        """Test that LLMQuotaLimitExceededException preserves the original error message."""
+        original_error = "429 Too Many Requests: Rate limit exceeded"
+        exception = LLMQuotaLimitExceededException(original_error)
+        
+        assert exception.original_error_message == original_error
+
+    @pytest.mark.parametrize("error_message,should_match", [
+        ("quota exceeded", True),
+        ("rate limit exceeded", True),
+        ("resource exhausted", True),
+        ("too many requests", True),
+        ("quota limit reached", True),
+        ("api quota exceeded", True),
+        ("usage limit exceeded", True),
+        ("billing quota exceeded", True),
+        ("request limit exceeded", True),
+        ("daily quota exceeded", True),
+        ("monthly quota exceeded", True),
+        ("QUOTA EXCEEDED", True),  # Case insensitive
+        ("Rate Limit Exceeded", True),  # Case insensitive
+        ("some other error", False),
+        ("network timeout", False),
+    ])
+    def test_quota_limit_error_detection(self, error_message, should_match):
+        """Test that quota limit error detection works for various error messages."""
+        exception = LLMQuotaLimitExceededException(error_message)
+        assert exception._is_quota_limit_error(error_message) == should_match
+
+    def test_different_provider_quota_errors(self):
+        """Test quota limit handling for different LLM providers."""
+        test_cases = [
+            "Rate limit reached for requests",
+            "rate_limit_error: Number of requests per minute exceeded",
+            "RESOURCE_EXHAUSTED: Quota exceeded",
+            "429 Too Many Requests",
+        ]
+        
+        llm = LLM(model="gpt-3.5-turbo")
+        
+        for error_message in test_cases:
+            with patch("litellm.completion") as mock_completion:
+                mock_completion.side_effect = RateLimitError(error_message)
+                
+                with pytest.raises(LLMQuotaLimitExceededException) as exc_info:
+                    llm.call(messages=[{"role": "user", "content": "Hello"}])
+                
+                assert error_message in str(exc_info.value)
+
+    def test_quota_limit_vs_context_window_exceptions(self):
+        """Test that quota limit and context window exceptions are handled separately."""
+        from litellm.exceptions import ContextWindowExceededError
+        from crewai.utilities.exceptions import LLMContextLengthExceededException
+        
+        llm = LLM(model="gpt-3.5-turbo")
+        
+        with patch("litellm.completion") as mock_completion:
+            mock_completion.side_effect = RateLimitError("Quota exceeded")
+            
+            with pytest.raises(LLMQuotaLimitExceededException):
+                llm.call(messages=[{"role": "user", "content": "Hello"}])
+        
+        with patch("litellm.completion") as mock_completion:
+            mock_completion.side_effect = ContextWindowExceededError("Context length exceeded")
+            
+            with pytest.raises(LLMContextLengthExceededException):
+                llm.call(messages=[{"role": "user", "content": "Hello"}])
+
+    def test_quota_limit_exception_in_crew_agent_executor(self):
+        """Test that CrewAgentExecutor handles quota limit exceptions properly."""
+        from crewai.agents.crew_agent_executor import CrewAgentExecutor
+        
+        agent = Agent(
+            role="Test Agent",
+            goal="Test goal",
+            backstory="Test backstory",
+            llm=LLM(model="gpt-3.5-turbo")
+        )
+        
+        executor = CrewAgentExecutor(agent=agent)
+        
+        with patch.object(agent.llm, "call") as mock_call:
+            mock_call.side_effect = LLMQuotaLimitExceededException("Daily quota exceeded")
+            
+            with pytest.raises(LLMQuotaLimitExceededException):
+                executor.invoke({
+                    "input": "Test input",
+                    "chat_history": [],
+                    "agent_scratchpad": ""
+                })