fix: re-raise OutputParserError in format_answer() for retry logic (fixes #4113)

This fix addresses issue #4113 where format_answer() was catching all exceptions
including OutputParserError and converting them to AgentFinish, which prevented
the retry logic in _invoke_loop() from working correctly when LLMs return
malformed output.

Changes:
- Modified format_answer() to re-raise OutputParserError so it can be caught
  by the retry logic in crew_agent_executor.py and lite_agent.py
- Added safeguard in handle_max_iterations_exceeded() to catch OutputParserError
  and return AgentFinish to prevent infinite loops when forced final answer
  is malformed
- Added comprehensive tests covering the fix

The fix ensures that when an LLM returns malformed output (e.g., missing colons
after Thought/Action/Action Input), the agent will retry with an error message
instead of immediately returning the malformed text as the final answer.

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-12-17 14:10:23 +00:00
parent 1cdbe79b34
commit fc7bc2ae94
2 changed files with 290 additions and 1 deletions

View File

@@ -169,7 +169,18 @@ def handle_max_iterations_exceeded(
)
raise ValueError("Invalid response from LLM call - None or empty.")
formatted = format_answer(answer=answer)
try:
formatted = format_answer(answer=answer)
except OutputParserError:
printer.print(
content="Failed to parse forced final answer. Returning raw response.",
color="yellow",
)
return AgentFinish(
thought="Failed to parse LLM response during max iterations",
output=answer,
text=answer,
)
# If format_answer returned an AgentAction, convert it to AgentFinish
if isinstance(formatted, AgentFinish):
@@ -206,9 +217,15 @@ def format_answer(answer: str) -> AgentAction | AgentFinish:
Returns:
Either an AgentAction or AgentFinish
Raises:
OutputParserError: If parsing fails due to malformed LLM output format.
This allows the retry logic in _invoke_loop() to handle the error.
"""
try:
return parse(answer)
except OutputParserError:
raise
except Exception:
return AgentFinish(
thought="Failed to parse LLM response",

View File

@@ -0,0 +1,272 @@
"""Tests for agent_utils module.
These tests cover the format_answer() and handle_max_iterations_exceeded() functions,
specifically testing the fix for issue #4113 where OutputParserError was being
swallowed instead of being re-raised for retry logic.
"""
from unittest.mock import MagicMock, patch
import pytest
from crewai.agents.parser import (
AgentAction,
AgentFinish,
OutputParserError,
)
from crewai.utilities.agent_utils import (
format_answer,
handle_max_iterations_exceeded,
process_llm_response,
)
class TestFormatAnswer:
"""Tests for the format_answer function."""
def test_format_answer_with_valid_action(self) -> None:
"""Test that format_answer correctly parses a valid action."""
answer = "Thought: Let's search\nAction: search\nAction Input: query"
result = format_answer(answer)
assert isinstance(result, AgentAction)
assert result.tool == "search"
assert result.tool_input == "query"
def test_format_answer_with_valid_final_answer(self) -> None:
"""Test that format_answer correctly parses a valid final answer."""
answer = "Thought: I found the answer\nFinal Answer: The result is 42"
result = format_answer(answer)
assert isinstance(result, AgentFinish)
assert result.output == "The result is 42"
def test_format_answer_raises_output_parser_error_for_malformed_output(
self,
) -> None:
"""Test that format_answer re-raises OutputParserError for malformed output.
This is the core fix for issue #4113. Previously, format_answer would catch
all exceptions and return AgentFinish, which broke the retry logic.
"""
malformed_answer = """Thought
The user wants to verify something.
Action
Video Analysis Tool
Action Input:
{"query": "Is there something?"}"""
with pytest.raises(OutputParserError):
format_answer(malformed_answer)
def test_format_answer_raises_output_parser_error_missing_action(self) -> None:
"""Test that format_answer re-raises OutputParserError when Action is missing."""
answer = "Thought: Let's search\nAction Input: query"
with pytest.raises(OutputParserError) as exc_info:
format_answer(answer)
assert "Action:" in str(exc_info.value)
def test_format_answer_raises_output_parser_error_missing_action_input(
self,
) -> None:
"""Test that format_answer re-raises OutputParserError when Action Input is missing."""
answer = "Thought: Let's search\nAction: search"
with pytest.raises(OutputParserError) as exc_info:
format_answer(answer)
assert "Action Input:" in str(exc_info.value)
def test_format_answer_returns_agent_finish_for_generic_exception(self) -> None:
"""Test that format_answer returns AgentFinish for non-OutputParserError exceptions."""
with patch(
"crewai.utilities.agent_utils.parse",
side_effect=ValueError("Unexpected error"),
):
result = format_answer("some answer")
assert isinstance(result, AgentFinish)
assert result.thought == "Failed to parse LLM response"
assert result.output == "some answer"
class TestProcessLlmResponse:
"""Tests for the process_llm_response function."""
def test_process_llm_response_raises_output_parser_error(self) -> None:
"""Test that process_llm_response propagates OutputParserError."""
malformed_answer = "Thought\nMissing colons\nAction\nSome Tool"
with pytest.raises(OutputParserError):
process_llm_response(malformed_answer, use_stop_words=True)
def test_process_llm_response_with_valid_action(self) -> None:
"""Test that process_llm_response correctly processes a valid action."""
answer = "Thought: Let's search\nAction: search\nAction Input: query"
result = process_llm_response(answer, use_stop_words=True)
assert isinstance(result, AgentAction)
assert result.tool == "search"
def test_process_llm_response_with_valid_final_answer(self) -> None:
"""Test that process_llm_response correctly processes a valid final answer."""
answer = "Thought: Done\nFinal Answer: The result"
result = process_llm_response(answer, use_stop_words=True)
assert isinstance(result, AgentFinish)
assert result.output == "The result"
class TestHandleMaxIterationsExceeded:
"""Tests for the handle_max_iterations_exceeded function."""
def test_handle_max_iterations_exceeded_with_valid_final_answer(self) -> None:
"""Test that handle_max_iterations_exceeded returns AgentFinish for valid output."""
mock_llm = MagicMock()
mock_llm.call.return_value = "Thought: Done\nFinal Answer: The final result"
mock_printer = MagicMock()
mock_i18n = MagicMock()
mock_i18n.errors.return_value = "Please provide final answer"
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=mock_printer,
i18n=mock_i18n,
messages=[],
llm=mock_llm,
callbacks=[],
)
assert isinstance(result, AgentFinish)
assert result.output == "The final result"
def test_handle_max_iterations_exceeded_with_valid_action_converts_to_finish(
self,
) -> None:
"""Test that handle_max_iterations_exceeded converts AgentAction to AgentFinish."""
mock_llm = MagicMock()
mock_llm.call.return_value = (
"Thought: Using tool\nAction: search\nAction Input: query"
)
mock_printer = MagicMock()
mock_i18n = MagicMock()
mock_i18n.errors.return_value = "Please provide final answer"
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=mock_printer,
i18n=mock_i18n,
messages=[],
llm=mock_llm,
callbacks=[],
)
assert isinstance(result, AgentFinish)
def test_handle_max_iterations_exceeded_catches_output_parser_error(self) -> None:
"""Test that handle_max_iterations_exceeded catches OutputParserError and returns AgentFinish.
This prevents infinite loops when the forced final answer is malformed.
Without this safeguard, the OutputParserError would bubble up to _invoke_loop(),
which would retry, hit max iterations again, and loop forever.
"""
malformed_response = """Thought
Missing colons everywhere
Action
Some Tool
Action Input:
{"query": "test"}"""
mock_llm = MagicMock()
mock_llm.call.return_value = malformed_response
mock_printer = MagicMock()
mock_i18n = MagicMock()
mock_i18n.errors.return_value = "Please provide final answer"
result = handle_max_iterations_exceeded(
formatted_answer=None,
printer=mock_printer,
i18n=mock_i18n,
messages=[],
llm=mock_llm,
callbacks=[],
)
assert isinstance(result, AgentFinish)
assert result.output == malformed_response
assert "Failed to parse LLM response during max iterations" in result.thought
mock_printer.print.assert_any_call(
content="Failed to parse forced final answer. Returning raw response.",
color="yellow",
)
def test_handle_max_iterations_exceeded_with_previous_formatted_answer(
self,
) -> None:
"""Test that handle_max_iterations_exceeded uses previous answer text."""
mock_llm = MagicMock()
mock_llm.call.return_value = "Thought: Done\nFinal Answer: New result"
mock_printer = MagicMock()
mock_i18n = MagicMock()
mock_i18n.errors.return_value = "Please provide final answer"
previous_answer = AgentAction(
thought="Previous thought",
tool="search",
tool_input="query",
text="Previous text",
)
result = handle_max_iterations_exceeded(
formatted_answer=previous_answer,
printer=mock_printer,
i18n=mock_i18n,
messages=[],
llm=mock_llm,
callbacks=[],
)
assert isinstance(result, AgentFinish)
assert result.output == "New result"
def test_handle_max_iterations_exceeded_raises_on_empty_response(self) -> None:
"""Test that handle_max_iterations_exceeded raises ValueError for empty response."""
mock_llm = MagicMock()
mock_llm.call.return_value = ""
mock_printer = MagicMock()
mock_i18n = MagicMock()
mock_i18n.errors.return_value = "Please provide final answer"
with pytest.raises(ValueError, match="Invalid response from LLM call"):
handle_max_iterations_exceeded(
formatted_answer=None,
printer=mock_printer,
i18n=mock_i18n,
messages=[],
llm=mock_llm,
callbacks=[],
)
class TestRetryLogicIntegration:
"""Integration tests to verify the retry logic works correctly with the fix."""
def test_malformed_output_allows_retry_in_format_answer(self) -> None:
"""Test that malformed output raises OutputParserError which can be caught for retry.
This simulates what happens in _invoke_loop() when the LLM returns malformed output.
The OutputParserError should be raised so the loop can catch it and retry.
"""
malformed_outputs = [
"Thought\nMissing colon after Thought",
"Thought: OK\nAction\nMissing colon after Action",
"Thought: OK\nAction: tool\nAction Input\nMissing colon",
"Random text without any structure",
]
for malformed in malformed_outputs:
with pytest.raises(OutputParserError):
format_answer(malformed)
def test_valid_output_does_not_raise(self) -> None:
"""Test that valid outputs are parsed correctly without raising."""
valid_outputs = [
("Thought: Let's search\nAction: search\nAction Input: query", AgentAction),
("Thought: Done\nFinal Answer: The result", AgentFinish),
]
for output, expected_type in valid_outputs:
result = format_answer(output)
assert isinstance(result, expected_type)