From b065a45dda0bd3c6715ab572d0546e7474b2a575 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:50:09 +0000 Subject: [PATCH] Fix #5537: Gracefully handle empty LLM response on forced final answer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenRouter-hosted thinking models (Claude Sonnet 4.5, Opus 4.5, Gemini 3 Pro Preview) can return an empty textual response when forced to produce a final answer after max_iter is reached, because the turn was spent on reasoning tokens. The prior behavior raised a raw ValueError, crashing the entire crew execution. handle_max_iterations_exceeded now returns a graceful AgentFinish using the last partial text (when available) or a descriptive fallback message. Non-string responses are coerced to strings before being passed to format_answer to avoid downstream TypeErrors. Co-Authored-By: João --- .../src/crewai/utilities/agent_utils.py | 35 ++++- .../tests/utilities/test_agent_utils.py | 125 ++++++++++++++++++ 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 684fd9287..ad2bd0481 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -293,13 +293,40 @@ def handle_max_iterations_exceeded( callbacks=callbacks, ) - if answer is None or answer == "": + # Some providers (notably OpenRouter serving Anthropic/Gemini "thinking" + # models such as Claude Sonnet 4.5, Opus 4.5 or Gemini 3 Pro) may return + # an empty textual response when forced to produce a final answer, + # because the model spent its turn on reasoning tokens. In that case we + # prefer to surface whatever partial work we already have rather than + # crashing the entire execution with a raw ValueError. + if answer is None or (isinstance(answer, str) and answer == ""): if verbose: printer.print( - content="Received None or empty response from LLM call.", - color="red", + content=( + "Received None or empty response from LLM call. " + "Returning best-effort final answer." + ), + color="yellow", ) - raise ValueError("Invalid response from LLM call - None or empty.") + if ( + formatted_answer is not None + and hasattr(formatted_answer, "text") + and formatted_answer.text + ): + fallback_text = formatted_answer.text + else: + fallback_text = ( + "Agent stopped after reaching the maximum number of " + "iterations without producing a final answer." + ) + return AgentFinish( + thought="", + output=fallback_text, + text=fallback_text, + ) + + if not isinstance(answer, str): + answer = str(answer) formatted = format_answer(answer=answer) diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py index 42de64fe6..ec114dbb6 100644 --- a/lib/crewai/tests/utilities/test_agent_utils.py +++ b/lib/crewai/tests/utilities/test_agent_utils.py @@ -9,6 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from pydantic import BaseModel, Field +from crewai.agents.parser import AgentAction, AgentFinish from crewai.tools.base_tool import BaseTool from crewai.utilities.agent_utils import ( _asummarize_chunks, @@ -17,9 +18,11 @@ from crewai.utilities.agent_utils import ( _format_messages_for_summary, _split_messages_into_chunks, convert_tools_to_openai_schema, + handle_max_iterations_exceeded, parse_tool_call_args, summarize_messages, ) +from crewai.utilities.printer import Printer class CalculatorInput(BaseModel): @@ -1033,3 +1036,125 @@ class TestParseToolCallArgs: _, error = parse_tool_call_args("{bad json}", "tool", "call_7") assert error is not None assert set(error.keys()) == {"call_id", "func_name", "result", "from_cache", "original_tool"} + + +class TestHandleMaxIterationsExceeded: + """Tests for handle_max_iterations_exceeded. + + Regression coverage for https://github.com/crewAIInc/crewAI/issues/5537: + when OpenRouter-hosted "thinking" models (Anthropic Claude Sonnet 4.5, + Opus 4.5 or Gemini 3 Pro Preview) spend their forced-final-answer turn + on reasoning tokens, the textual response comes back empty. The + executor should not crash with a raw ``ValueError``; it should return + a graceful ``AgentFinish`` with the best text we have. + """ + + def _make_mocks(self, llm_return_value: Any) -> tuple[MagicMock, Printer, list[Any]]: + llm = MagicMock() + llm.call = MagicMock(return_value=llm_return_value) + printer = Printer() + messages: list[Any] = [] + return llm, printer, messages + + def test_empty_string_response_returns_agent_finish_with_previous_text( + self, + ) -> None: + """Empty content after max-iter should reuse prior formatted_answer.""" + llm, printer, messages = self._make_mocks(llm_return_value="") + previous = AgentAction( + thought="thinking", + tool="my_tool", + tool_input="{}", + text="Partial reasoning I already produced.", + result="tool result", + ) + + result = handle_max_iterations_exceeded( + formatted_answer=previous, + printer=printer, + messages=messages, + llm=llm, + callbacks=[], + verbose=False, + ) + + assert isinstance(result, AgentFinish) + assert result.text == "Partial reasoning I already produced." + assert result.output == "Partial reasoning I already produced." + llm.call.assert_called_once() + + def test_none_response_returns_agent_finish_with_fallback_text(self) -> None: + """When the LLM returns None and no prior text exists, still produce + an AgentFinish describing the max-iterations situation.""" + llm, printer, messages = self._make_mocks(llm_return_value=None) + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=printer, + messages=messages, + llm=llm, + callbacks=[], + verbose=False, + ) + + assert isinstance(result, AgentFinish) + assert "maximum number of" in result.text + assert result.text == result.output + + def test_empty_response_without_previous_answer_returns_fallback( + self, + ) -> None: + """Matches the native-tools loop call-site which passes + ``formatted_answer=None`` when max_iter is hit.""" + llm, printer, messages = self._make_mocks(llm_return_value="") + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=printer, + messages=messages, + llm=llm, + callbacks=[], + verbose=False, + ) + + assert isinstance(result, AgentFinish) + assert result.text + assert "maximum number of" in result.text + + def test_non_empty_response_produces_final_answer(self) -> None: + """Baseline: a normal string response is still parsed normally.""" + llm, printer, messages = self._make_mocks( + llm_return_value="Final Answer: hello" + ) + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=printer, + messages=messages, + llm=llm, + callbacks=[], + verbose=False, + ) + + assert isinstance(result, AgentFinish) + assert "hello" in result.text + llm.call.assert_called_once() + + def test_non_string_response_is_coerced_to_string(self) -> None: + """Some providers may return non-string payloads — we should not + crash on a ``TypeError`` coming out of ``format_answer``.""" + llm, printer, messages = self._make_mocks( + llm_return_value={"final": "payload"} + ) + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=printer, + messages=messages, + llm=llm, + callbacks=[], + verbose=False, + ) + + assert isinstance(result, AgentFinish) + assert result.text