From fc7bc2ae9488562227097136c589741e2aea1127 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 14:10:23 +0000 Subject: [PATCH] fix: re-raise OutputParserError in format_answer() for retry logic (fixes #4113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix addresses issue #4113 where format_answer() was catching all exceptions including OutputParserError and converting them to AgentFinish, which prevented the retry logic in _invoke_loop() from working correctly when LLMs return malformed output. Changes: - Modified format_answer() to re-raise OutputParserError so it can be caught by the retry logic in crew_agent_executor.py and lite_agent.py - Added safeguard in handle_max_iterations_exceeded() to catch OutputParserError and return AgentFinish to prevent infinite loops when forced final answer is malformed - Added comprehensive tests covering the fix The fix ensures that when an LLM returns malformed output (e.g., missing colons after Thought/Action/Action Input), the agent will retry with an error message instead of immediately returning the malformed text as the final answer. Co-Authored-By: João --- .../src/crewai/utilities/agent_utils.py | 19 +- .../tests/utilities/test_agent_utils.py | 272 ++++++++++++++++++ 2 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 lib/crewai/tests/utilities/test_agent_utils.py diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 973ad5596..4d2bf99d5 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -169,7 +169,18 @@ def handle_max_iterations_exceeded( ) raise ValueError("Invalid response from LLM call - None or empty.") - formatted = format_answer(answer=answer) + try: + formatted = format_answer(answer=answer) + except OutputParserError: + printer.print( + content="Failed to parse forced final answer. Returning raw response.", + color="yellow", + ) + return AgentFinish( + thought="Failed to parse LLM response during max iterations", + output=answer, + text=answer, + ) # If format_answer returned an AgentAction, convert it to AgentFinish if isinstance(formatted, AgentFinish): @@ -206,9 +217,15 @@ def format_answer(answer: str) -> AgentAction | AgentFinish: Returns: Either an AgentAction or AgentFinish + + Raises: + OutputParserError: If parsing fails due to malformed LLM output format. + This allows the retry logic in _invoke_loop() to handle the error. """ try: return parse(answer) + except OutputParserError: + raise except Exception: return AgentFinish( thought="Failed to parse LLM response", diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py new file mode 100644 index 000000000..34f61af5f --- /dev/null +++ b/lib/crewai/tests/utilities/test_agent_utils.py @@ -0,0 +1,272 @@ +"""Tests for agent_utils module. + +These tests cover the format_answer() and handle_max_iterations_exceeded() functions, +specifically testing the fix for issue #4113 where OutputParserError was being +swallowed instead of being re-raised for retry logic. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from crewai.agents.parser import ( + AgentAction, + AgentFinish, + OutputParserError, +) +from crewai.utilities.agent_utils import ( + format_answer, + handle_max_iterations_exceeded, + process_llm_response, +) + + +class TestFormatAnswer: + """Tests for the format_answer function.""" + + def test_format_answer_with_valid_action(self) -> None: + """Test that format_answer correctly parses a valid action.""" + answer = "Thought: Let's search\nAction: search\nAction Input: query" + result = format_answer(answer) + assert isinstance(result, AgentAction) + assert result.tool == "search" + assert result.tool_input == "query" + + def test_format_answer_with_valid_final_answer(self) -> None: + """Test that format_answer correctly parses a valid final answer.""" + answer = "Thought: I found the answer\nFinal Answer: The result is 42" + result = format_answer(answer) + assert isinstance(result, AgentFinish) + assert result.output == "The result is 42" + + def test_format_answer_raises_output_parser_error_for_malformed_output( + self, + ) -> None: + """Test that format_answer re-raises OutputParserError for malformed output. + + This is the core fix for issue #4113. Previously, format_answer would catch + all exceptions and return AgentFinish, which broke the retry logic. + """ + malformed_answer = """Thought +The user wants to verify something. +Action +Video Analysis Tool +Action Input: +{"query": "Is there something?"}""" + + with pytest.raises(OutputParserError): + format_answer(malformed_answer) + + def test_format_answer_raises_output_parser_error_missing_action(self) -> None: + """Test that format_answer re-raises OutputParserError when Action is missing.""" + answer = "Thought: Let's search\nAction Input: query" + with pytest.raises(OutputParserError) as exc_info: + format_answer(answer) + assert "Action:" in str(exc_info.value) + + def test_format_answer_raises_output_parser_error_missing_action_input( + self, + ) -> None: + """Test that format_answer re-raises OutputParserError when Action Input is missing.""" + answer = "Thought: Let's search\nAction: search" + with pytest.raises(OutputParserError) as exc_info: + format_answer(answer) + assert "Action Input:" in str(exc_info.value) + + def test_format_answer_returns_agent_finish_for_generic_exception(self) -> None: + """Test that format_answer returns AgentFinish for non-OutputParserError exceptions.""" + with patch( + "crewai.utilities.agent_utils.parse", + side_effect=ValueError("Unexpected error"), + ): + result = format_answer("some answer") + assert isinstance(result, AgentFinish) + assert result.thought == "Failed to parse LLM response" + assert result.output == "some answer" + + +class TestProcessLlmResponse: + """Tests for the process_llm_response function.""" + + def test_process_llm_response_raises_output_parser_error(self) -> None: + """Test that process_llm_response propagates OutputParserError.""" + malformed_answer = "Thought\nMissing colons\nAction\nSome Tool" + with pytest.raises(OutputParserError): + process_llm_response(malformed_answer, use_stop_words=True) + + def test_process_llm_response_with_valid_action(self) -> None: + """Test that process_llm_response correctly processes a valid action.""" + answer = "Thought: Let's search\nAction: search\nAction Input: query" + result = process_llm_response(answer, use_stop_words=True) + assert isinstance(result, AgentAction) + assert result.tool == "search" + + def test_process_llm_response_with_valid_final_answer(self) -> None: + """Test that process_llm_response correctly processes a valid final answer.""" + answer = "Thought: Done\nFinal Answer: The result" + result = process_llm_response(answer, use_stop_words=True) + assert isinstance(result, AgentFinish) + assert result.output == "The result" + + +class TestHandleMaxIterationsExceeded: + """Tests for the handle_max_iterations_exceeded function.""" + + def test_handle_max_iterations_exceeded_with_valid_final_answer(self) -> None: + """Test that handle_max_iterations_exceeded returns AgentFinish for valid output.""" + mock_llm = MagicMock() + mock_llm.call.return_value = "Thought: Done\nFinal Answer: The final result" + mock_printer = MagicMock() + mock_i18n = MagicMock() + mock_i18n.errors.return_value = "Please provide final answer" + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=mock_printer, + i18n=mock_i18n, + messages=[], + llm=mock_llm, + callbacks=[], + ) + + assert isinstance(result, AgentFinish) + assert result.output == "The final result" + + def test_handle_max_iterations_exceeded_with_valid_action_converts_to_finish( + self, + ) -> None: + """Test that handle_max_iterations_exceeded converts AgentAction to AgentFinish.""" + mock_llm = MagicMock() + mock_llm.call.return_value = ( + "Thought: Using tool\nAction: search\nAction Input: query" + ) + mock_printer = MagicMock() + mock_i18n = MagicMock() + mock_i18n.errors.return_value = "Please provide final answer" + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=mock_printer, + i18n=mock_i18n, + messages=[], + llm=mock_llm, + callbacks=[], + ) + + assert isinstance(result, AgentFinish) + + def test_handle_max_iterations_exceeded_catches_output_parser_error(self) -> None: + """Test that handle_max_iterations_exceeded catches OutputParserError and returns AgentFinish. + + This prevents infinite loops when the forced final answer is malformed. + Without this safeguard, the OutputParserError would bubble up to _invoke_loop(), + which would retry, hit max iterations again, and loop forever. + """ + malformed_response = """Thought +Missing colons everywhere +Action +Some Tool +Action Input: +{"query": "test"}""" + + mock_llm = MagicMock() + mock_llm.call.return_value = malformed_response + mock_printer = MagicMock() + mock_i18n = MagicMock() + mock_i18n.errors.return_value = "Please provide final answer" + + result = handle_max_iterations_exceeded( + formatted_answer=None, + printer=mock_printer, + i18n=mock_i18n, + messages=[], + llm=mock_llm, + callbacks=[], + ) + + assert isinstance(result, AgentFinish) + assert result.output == malformed_response + assert "Failed to parse LLM response during max iterations" in result.thought + mock_printer.print.assert_any_call( + content="Failed to parse forced final answer. Returning raw response.", + color="yellow", + ) + + def test_handle_max_iterations_exceeded_with_previous_formatted_answer( + self, + ) -> None: + """Test that handle_max_iterations_exceeded uses previous answer text.""" + mock_llm = MagicMock() + mock_llm.call.return_value = "Thought: Done\nFinal Answer: New result" + mock_printer = MagicMock() + mock_i18n = MagicMock() + mock_i18n.errors.return_value = "Please provide final answer" + + previous_answer = AgentAction( + thought="Previous thought", + tool="search", + tool_input="query", + text="Previous text", + ) + + result = handle_max_iterations_exceeded( + formatted_answer=previous_answer, + printer=mock_printer, + i18n=mock_i18n, + messages=[], + llm=mock_llm, + callbacks=[], + ) + + assert isinstance(result, AgentFinish) + assert result.output == "New result" + + def test_handle_max_iterations_exceeded_raises_on_empty_response(self) -> None: + """Test that handle_max_iterations_exceeded raises ValueError for empty response.""" + mock_llm = MagicMock() + mock_llm.call.return_value = "" + mock_printer = MagicMock() + mock_i18n = MagicMock() + mock_i18n.errors.return_value = "Please provide final answer" + + with pytest.raises(ValueError, match="Invalid response from LLM call"): + handle_max_iterations_exceeded( + formatted_answer=None, + printer=mock_printer, + i18n=mock_i18n, + messages=[], + llm=mock_llm, + callbacks=[], + ) + + +class TestRetryLogicIntegration: + """Integration tests to verify the retry logic works correctly with the fix.""" + + def test_malformed_output_allows_retry_in_format_answer(self) -> None: + """Test that malformed output raises OutputParserError which can be caught for retry. + + This simulates what happens in _invoke_loop() when the LLM returns malformed output. + The OutputParserError should be raised so the loop can catch it and retry. + """ + malformed_outputs = [ + "Thought\nMissing colon after Thought", + "Thought: OK\nAction\nMissing colon after Action", + "Thought: OK\nAction: tool\nAction Input\nMissing colon", + "Random text without any structure", + ] + + for malformed in malformed_outputs: + with pytest.raises(OutputParserError): + format_answer(malformed) + + def test_valid_output_does_not_raise(self) -> None: + """Test that valid outputs are parsed correctly without raising.""" + valid_outputs = [ + ("Thought: Let's search\nAction: search\nAction Input: query", AgentAction), + ("Thought: Done\nFinal Answer: The result", AgentFinish), + ] + + for output, expected_type in valid_outputs: + result = format_answer(output) + assert isinstance(result, expected_type)