Fix #5537 : Gracefully handle empty LLM response on forced final answer

OpenRouter-hosted thinking models (Claude Sonnet 4.5, Opus 4.5, Gemini 3 Pro Preview) can return an empty textual response when forced to produce a final answer after max_iter is reached, because the turn was spent on reasoning tokens. The prior behavior raised a raw ValueError, crashing the entire crew execution. handle_max_iterations_exceeded now returns a graceful AgentFinish using the last partial text (when available) or a descriptive fallback message. Non-string responses are coerced to strings before being passed to format_answer to avoid downstream TypeErrors. Co-Authored-By: João <joao@crewai.com>
2026-05-07 10:12:38 +00:00 · 2026-04-20 01:50:09 +00:00
2 changed files with 156 additions and 4 deletions
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -293,13 +293,40 @@ def handle_max_iterations_exceeded(
        callbacks=callbacks,
    )

-    if answer is None or answer == "":
+    # Some providers (notably OpenRouter serving Anthropic/Gemini "thinking"
+    # models such as Claude Sonnet 4.5, Opus 4.5 or Gemini 3 Pro) may return
+    # an empty textual response when forced to produce a final answer,
+    # because the model spent its turn on reasoning tokens. In that case we
+    # prefer to surface whatever partial work we already have rather than
+    # crashing the entire execution with a raw ValueError.
+    if answer is None or (isinstance(answer, str) and answer == ""):
        if verbose:
            printer.print(
-                content="Received None or empty response from LLM call.",
-                color="red",
+                content=(
+                    "Received None or empty response from LLM call. "
+                    "Returning best-effort final answer."
+                ),
+                color="yellow",
            )
-        raise ValueError("Invalid response from LLM call - None or empty.")
+        if (
+            formatted_answer is not None
+            and hasattr(formatted_answer, "text")
+            and formatted_answer.text
+        ):
+            fallback_text = formatted_answer.text
+        else:
+            fallback_text = (
+                "Agent stopped after reaching the maximum number of "
+                "iterations without producing a final answer."
+            )
+        return AgentFinish(
+            thought="",
+            output=fallback_text,
+            text=fallback_text,
+        )
+
+    if not isinstance(answer, str):
+        answer = str(answer)

    formatted = format_answer(answer=answer)

--- a/lib/crewai/tests/utilities/test_agent_utils.py
+++ b/lib/crewai/tests/utilities/test_agent_utils.py
@@ -9,6 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from pydantic import BaseModel, Field

+from crewai.agents.parser import AgentAction, AgentFinish
 from crewai.tools.base_tool import BaseTool
 from crewai.utilities.agent_utils import (
    _asummarize_chunks,
@@ -17,9 +18,11 @@ from crewai.utilities.agent_utils import (
    _format_messages_for_summary,
    _split_messages_into_chunks,
    convert_tools_to_openai_schema,
+    handle_max_iterations_exceeded,
    parse_tool_call_args,
    summarize_messages,
 )
+from crewai.utilities.printer import Printer


 class CalculatorInput(BaseModel):
@@ -1033,3 +1036,125 @@ class TestParseToolCallArgs:
        _, error = parse_tool_call_args("{bad json}", "tool", "call_7")
        assert error is not None
        assert set(error.keys()) == {"call_id", "func_name", "result", "from_cache", "original_tool"}
+
+
+class TestHandleMaxIterationsExceeded:
+    """Tests for handle_max_iterations_exceeded.
+
+    Regression coverage for https://github.com/crewAIInc/crewAI/issues/5537:
+    when OpenRouter-hosted "thinking" models (Anthropic Claude Sonnet 4.5,
+    Opus 4.5 or Gemini 3 Pro Preview) spend their forced-final-answer turn
+    on reasoning tokens, the textual response comes back empty. The
+    executor should not crash with a raw ``ValueError``; it should return
+    a graceful ``AgentFinish`` with the best text we have.
+    """
+
+    def _make_mocks(self, llm_return_value: Any) -> tuple[MagicMock, Printer, list[Any]]:
+        llm = MagicMock()
+        llm.call = MagicMock(return_value=llm_return_value)
+        printer = Printer()
+        messages: list[Any] = []
+        return llm, printer, messages
+
+    def test_empty_string_response_returns_agent_finish_with_previous_text(
+        self,
+    ) -> None:
+        """Empty content after max-iter should reuse prior formatted_answer."""
+        llm, printer, messages = self._make_mocks(llm_return_value="")
+        previous = AgentAction(
+            thought="thinking",
+            tool="my_tool",
+            tool_input="{}",
+            text="Partial reasoning I already produced.",
+            result="tool result",
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=previous,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.text == "Partial reasoning I already produced."
+        assert result.output == "Partial reasoning I already produced."
+        llm.call.assert_called_once()
+
+    def test_none_response_returns_agent_finish_with_fallback_text(self) -> None:
+        """When the LLM returns None and no prior text exists, still produce
+        an AgentFinish describing the max-iterations situation."""
+        llm, printer, messages = self._make_mocks(llm_return_value=None)
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert "maximum number of" in result.text
+        assert result.text == result.output
+
+    def test_empty_response_without_previous_answer_returns_fallback(
+        self,
+    ) -> None:
+        """Matches the native-tools loop call-site which passes
+        ``formatted_answer=None`` when max_iter is hit."""
+        llm, printer, messages = self._make_mocks(llm_return_value="")
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.text
+        assert "maximum number of" in result.text
+
+    def test_non_empty_response_produces_final_answer(self) -> None:
+        """Baseline: a normal string response is still parsed normally."""
+        llm, printer, messages = self._make_mocks(
+            llm_return_value="Final Answer: hello"
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert "hello" in result.text
+        llm.call.assert_called_once()
+
+    def test_non_string_response_is_coerced_to_string(self) -> None:
+        """Some providers may return non-string payloads — we should not
+        crash on a ``TypeError`` coming out of ``format_answer``."""
+        llm, printer, messages = self._make_mocks(
+            llm_return_value={"final": "payload"}
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=printer,
+            messages=messages,
+            llm=llm,
+            callbacks=[],
+            verbose=False,
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.text