From b065a45dda0bd3c6715ab572d0546e7474b2a575 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 20 Apr 2026 01:50:09 +0000
Subject: [PATCH] Fix #5537: Gracefully handle empty LLM response on forced
 final answer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenRouter-hosted thinking models (Claude Sonnet 4.5, Opus 4.5, Gemini
3 Pro Preview) can return an empty textual response when forced to
produce a final answer after max_iter is reached, because the turn was
spent on reasoning tokens. The prior behavior raised a raw ValueError,
crashing the entire crew execution.

handle_max_iterations_exceeded now returns a graceful AgentFinish using
the last partial text (when available) or a descriptive fallback
message. Non-string responses are coerced to strings before being
passed to format_answer to avoid downstream TypeErrors.

Co-Authored-By: João <joao@crewai.com>
---
 .../src/crewai/utilities/agent_utils.py       |  35 ++++-
 .../tests/utilities/test_agent_utils.py       | 125 ++++++++++++++++++
 2 files changed, 156 insertions(+), 4 deletions(-)

diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py
index 684fd9287..ad2bd0481 100644
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -293,13 +293,40 @@ def handle_max_iterations_exceeded(
         callbacks=callbacks,
     )
 
-    if answer is None or answer == "":
+    # Some providers (notably OpenRouter serving Anthropic/Gemini "thinking"
+    # models such as Claude Sonnet 4.5, Opus 4.5 or Gemini 3 Pro) may return
+    # an empty textual response when forced to produce a final answer,
+    # because the model spent its turn on reasoning tokens. In that case we
+    # prefer to surface whatever partial work we already have rather than
+    # crashing the entire execution with a raw ValueError.
+    if answer is None or (isinstance(answer, str) and answer == ""):
         if verbose:
             printer.print(
-                content="Received None or empty response from LLM call.",
-                color="red",
+                content=(
+                    "Received None or empty response from LLM call. "
+                    "Returning best-effort final answer."
+                ),
+                color="yellow",
             )
-        raise ValueError("Invalid response from LLM call - None or empty.")
+        if (
+            formatted_answer is not None
+            and hasattr(formatted_answer, "text")
+            and formatted_answer.text
+        ):
+            fallback_text = formatted_answer.text
+        else:
+            fallback_text = (
+                "Agent stopped after reaching the maximum number of "
+                "iterations without producing a final answer."
+            )
+        return AgentFinish(
+            thought="",
+            output=fallback_text,
+            text=fallback_text,
+        )
+
+    if not isinstance(answer, str):
+        answer = str(answer)
 
     formatted = format_answer(answer=answer)
 
diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py
index 42de64fe6..ec114dbb6 100644
--- a/lib/crewai/tests/utilities/test_agent_utils.py
+++ b/lib/crewai/tests/utilities/test_agent_utils.py
@@ -9,6 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from pydantic import BaseModel, Field
 
+from crewai.agents.parser import AgentAction, AgentFinish
 from crewai.tools.base_tool import BaseTool
 from crewai.utilities.agent_utils import (
     _asummarize_chunks,
@@ -17,9 +18,11 @@ from crewai.utilities.agent_utils import (
     _format_messages_for_summary,
     _split_messages_into_chunks,
     convert_tools_to_openai_schema,
+    handle_max_iterations_exceeded,
     parse_tool_call_args,
     summarize_messages,
 )
+from crewai.utilities.printer import Printer
 
 
 class CalculatorInput(BaseModel):
@@ -1033,3 +1036,125 @@ class TestParseToolCallArgs:
         _, error = parse_tool_call_args("{bad json}", "tool", "call_7")
         assert error is not None
         assert set(error.keys()) == {"call_id", "func_name", "result", "from_cache", "original_tool"}
+
+
+class TestHandleMaxIterationsExceeded:
+    """Tests for handle_max_iterations_exceeded.
+
+    Regression coverage for https://github.com/crewAIInc/crewAI/issues/5537:
+    when OpenRouter-hosted "thinking" models (Anthropic Claude Sonnet 4.5,
+    Opus 4.5 or Gemini 3 Pro Preview) spend their forced-final-answer turn
+    on reasoning tokens, the textual response comes back empty. The
+    executor should not crash with a raw ``ValueError``; it should return
+    a graceful ``AgentFinish`` with the best text we have.
+    """
+
+    def _make_mocks(self, llm_return_value: Any) -> tuple[MagicMock, Printer, list[Any]]:
+        llm = MagicMock()
+        llm.call = MagicMock(return_value=llm_return_value)
+        printer = Printer()
+        messages: list[Any] = []
+        return llm, printer, messages
+
+    def test_empty_string_response_returns_agent_finish_with_previous_text(
+        self,
+    ) -> None:
+        """Empty content after max-iter should reuse prior formatted_answer."""
+        llm, printer, messages = self._make_mocks(llm_return_value="")
+        previous = AgentAction(
+            thought="thinking",
+            tool="my_tool",
+            tool_input="{}",
+            text="Partial reasoning I already produced.",
+            result="tool result",
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=previous,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.text == "Partial reasoning I already produced."
+        assert result.output == "Partial reasoning I already produced."
+        llm.call.assert_called_once()
+
+    def test_none_response_returns_agent_finish_with_fallback_text(self) -> None:
+        """When the LLM returns None and no prior text exists, still produce
+        an AgentFinish describing the max-iterations situation."""
+        llm, printer, messages = self._make_mocks(llm_return_value=None)
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert "maximum number of" in result.text
+        assert result.text == result.output
+
+    def test_empty_response_without_previous_answer_returns_fallback(
+        self,
+    ) -> None:
+        """Matches the native-tools loop call-site which passes
+        ``formatted_answer=None`` when max_iter is hit."""
+        llm, printer, messages = self._make_mocks(llm_return_value="")
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.text
+        assert "maximum number of" in result.text
+
+    def test_non_empty_response_produces_final_answer(self) -> None:
+        """Baseline: a normal string response is still parsed normally."""
+        llm, printer, messages = self._make_mocks(
+            llm_return_value="Final Answer: hello"
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert "hello" in result.text
+        llm.call.assert_called_once()
+
+    def test_non_string_response_is_coerced_to_string(self) -> None:
+        """Some providers may return non-string payloads — we should not
+        crash on a ``TypeError`` coming out of ``format_answer``."""
+        llm, printer, messages = self._make_mocks(
+            llm_return_value={"final": "payload"}
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.text