Compare commits

...

1 Commits

Author SHA1 Message Date
Devin AI
b065a45dda Fix #5537: Gracefully handle empty LLM response on forced final answer
OpenRouter-hosted thinking models (Claude Sonnet 4.5, Opus 4.5, Gemini
3 Pro Preview) can return an empty textual response when forced to
produce a final answer after max_iter is reached, because the turn was
spent on reasoning tokens. The prior behavior raised a raw ValueError,
crashing the entire crew execution.

handle_max_iterations_exceeded now returns a graceful AgentFinish using
the last partial text (when available) or a descriptive fallback
message. Non-string responses are coerced to strings before being
passed to format_answer to avoid downstream TypeErrors.

Co-Authored-By: João <joao@crewai.com>
2026-04-20 01:50:09 +00:00
2 changed files with 156 additions and 4 deletions

View File

@@ -293,13 +293,40 @@ def handle_max_iterations_exceeded(
callbacks=callbacks,
)
if answer is None or answer == "":
# Some providers (notably OpenRouter serving Anthropic/Gemini "thinking"
# models such as Claude Sonnet 4.5, Opus 4.5 or Gemini 3 Pro) may return
# an empty textual response when forced to produce a final answer,
# because the model spent its turn on reasoning tokens. In that case we
# prefer to surface whatever partial work we already have rather than
# crashing the entire execution with a raw ValueError.
if answer is None or (isinstance(answer, str) and answer == ""):
if verbose:
printer.print(
content="Received None or empty response from LLM call.",
color="red",
content=(
"Received None or empty response from LLM call. "
"Returning best-effort final answer."
),
color="yellow",
)
raise ValueError("Invalid response from LLM call - None or empty.")
if (
formatted_answer is not None
and hasattr(formatted_answer, "text")
and formatted_answer.text
):
fallback_text = formatted_answer.text
else:
fallback_text = (
"Agent stopped after reaching the maximum number of "
"iterations without producing a final answer."
)
return AgentFinish(
thought="",
output=fallback_text,
text=fallback_text,
)
if not isinstance(answer, str):
answer = str(answer)
formatted = format_answer(answer=answer)

View File

@@ -9,6 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from pydantic import BaseModel, Field
from crewai.agents.parser import AgentAction, AgentFinish
from crewai.tools.base_tool import BaseTool
from crewai.utilities.agent_utils import (
_asummarize_chunks,
@@ -17,9 +18,11 @@ from crewai.utilities.agent_utils import (
_format_messages_for_summary,
_split_messages_into_chunks,
convert_tools_to_openai_schema,
handle_max_iterations_exceeded,
parse_tool_call_args,
summarize_messages,
)
from crewai.utilities.printer import Printer
class CalculatorInput(BaseModel):
@@ -1033,3 +1036,125 @@ class TestParseToolCallArgs:
_, error = parse_tool_call_args("{bad json}", "tool", "call_7")
assert error is not None
assert set(error.keys()) == {"call_id", "func_name", "result", "from_cache", "original_tool"}
class TestHandleMaxIterationsExceeded:
"""Tests for handle_max_iterations_exceeded.
Regression coverage for https://github.com/crewAIInc/crewAI/issues/5537:
when OpenRouter-hosted "thinking" models (Anthropic Claude Sonnet 4.5,
Opus 4.5 or Gemini 3 Pro Preview) spend their forced-final-answer turn
on reasoning tokens, the textual response comes back empty. The
executor should not crash with a raw ``ValueError``; it should return
a graceful ``AgentFinish`` with the best text we have.
"""
def _make_mocks(self, llm_return_value: Any) -> tuple[MagicMock, Printer, list[Any]]:
llm = MagicMock()
llm.call = MagicMock(return_value=llm_return_value)
printer = Printer()
messages: list[Any] = []
return llm, printer, messages
def test_empty_string_response_returns_agent_finish_with_previous_text(
self,
) -> None:
"""Empty content after max-iter should reuse prior formatted_answer."""
llm, printer, messages = self._make_mocks(llm_return_value="")
previous = AgentAction(
thought="thinking",
tool="my_tool",
tool_input="{}",
text="Partial reasoning I already produced.",
result="tool result",
)
result = handle_max_iterations_exceeded(
formatted_answer=previous,
printer=printer,
messages=messages,
llm=llm,
callbacks=[],
verbose=False,
)
assert isinstance(result, AgentFinish)
assert result.text == "Partial reasoning I already produced."
assert result.output == "Partial reasoning I already produced."
llm.call.assert_called_once()
def test_none_response_returns_agent_finish_with_fallback_text(self) -> None:
"""When the LLM returns None and no prior text exists, still produce
an AgentFinish describing the max-iterations situation."""
llm, printer, messages = self._make_mocks(llm_return_value=None)
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=printer,
messages=messages,
llm=llm,
callbacks=[],
verbose=False,
)
assert isinstance(result, AgentFinish)
assert "maximum number of" in result.text
assert result.text == result.output
def test_empty_response_without_previous_answer_returns_fallback(
self,
) -> None:
"""Matches the native-tools loop call-site which passes
``formatted_answer=None`` when max_iter is hit."""
llm, printer, messages = self._make_mocks(llm_return_value="")
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=printer,
messages=messages,
llm=llm,
callbacks=[],
verbose=False,
)
assert isinstance(result, AgentFinish)
assert result.text
assert "maximum number of" in result.text
def test_non_empty_response_produces_final_answer(self) -> None:
"""Baseline: a normal string response is still parsed normally."""
llm, printer, messages = self._make_mocks(
llm_return_value="Final Answer: hello"
)
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=printer,
messages=messages,
llm=llm,
callbacks=[],
verbose=False,
)
assert isinstance(result, AgentFinish)
assert "hello" in result.text
llm.call.assert_called_once()
def test_non_string_response_is_coerced_to_string(self) -> None:
"""Some providers may return non-string payloads — we should not
crash on a ``TypeError`` coming out of ``format_answer``."""
llm, printer, messages = self._make_mocks(
llm_return_value={"final": "payload"}
)
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=printer,
messages=messages,
llm=llm,
callbacks=[],
verbose=False,
)
assert isinstance(result, AgentFinish)
assert result.text