From fc7bc2ae9488562227097136c589741e2aea1127 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 17 Dec 2025 14:10:23 +0000
Subject: [PATCH] fix: re-raise OutputParserError in format_answer() for retry
 logic (fixes #4113)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fix addresses issue #4113 where format_answer() was catching all exceptions
including OutputParserError and converting them to AgentFinish, which prevented
the retry logic in _invoke_loop() from working correctly when LLMs return
malformed output.

Changes:
- Modified format_answer() to re-raise OutputParserError so it can be caught
  by the retry logic in crew_agent_executor.py and lite_agent.py
- Added safeguard in handle_max_iterations_exceeded() to catch OutputParserError
  and return AgentFinish to prevent infinite loops when forced final answer
  is malformed
- Added comprehensive tests covering the fix

The fix ensures that when an LLM returns malformed output (e.g., missing colons
after Thought/Action/Action Input), the agent will retry with an error message
instead of immediately returning the malformed text as the final answer.

Co-Authored-By: João <joao@crewai.com>
---
 .../src/crewai/utilities/agent_utils.py       |  19 +-
 .../tests/utilities/test_agent_utils.py       | 272 ++++++++++++++++++
 2 files changed, 290 insertions(+), 1 deletion(-)
 create mode 100644 lib/crewai/tests/utilities/test_agent_utils.py

diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py
index 973ad5596..4d2bf99d5 100644
--- a/lib/crewai/src/crewai/utilities/agent_utils.py
+++ b/lib/crewai/src/crewai/utilities/agent_utils.py
@@ -169,7 +169,18 @@ def handle_max_iterations_exceeded(
         )
         raise ValueError("Invalid response from LLM call - None or empty.")
 
-    formatted = format_answer(answer=answer)
+    try:
+        formatted = format_answer(answer=answer)
+    except OutputParserError:
+        printer.print(
+            content="Failed to parse forced final answer. Returning raw response.",
+            color="yellow",
+        )
+        return AgentFinish(
+            thought="Failed to parse LLM response during max iterations",
+            output=answer,
+            text=answer,
+        )
 
     # If format_answer returned an AgentAction, convert it to AgentFinish
     if isinstance(formatted, AgentFinish):
@@ -206,9 +217,15 @@ def format_answer(answer: str) -> AgentAction | AgentFinish:
 
     Returns:
         Either an AgentAction or AgentFinish
+
+    Raises:
+        OutputParserError: If parsing fails due to malformed LLM output format.
+            This allows the retry logic in _invoke_loop() to handle the error.
     """
     try:
         return parse(answer)
+    except OutputParserError:
+        raise
     except Exception:
         return AgentFinish(
             thought="Failed to parse LLM response",
diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py
new file mode 100644
index 000000000..34f61af5f
--- /dev/null
+++ b/lib/crewai/tests/utilities/test_agent_utils.py
@@ -0,0 +1,272 @@
+"""Tests for agent_utils module.
+
+These tests cover the format_answer() and handle_max_iterations_exceeded() functions,
+specifically testing the fix for issue #4113 where OutputParserError was being
+swallowed instead of being re-raised for retry logic.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from crewai.agents.parser import (
+    AgentAction,
+    AgentFinish,
+    OutputParserError,
+)
+from crewai.utilities.agent_utils import (
+    format_answer,
+    handle_max_iterations_exceeded,
+    process_llm_response,
+)
+
+
+class TestFormatAnswer:
+    """Tests for the format_answer function."""
+
+    def test_format_answer_with_valid_action(self) -> None:
+        """Test that format_answer correctly parses a valid action."""
+        answer = "Thought: Let's search\nAction: search\nAction Input: query"
+        result = format_answer(answer)
+        assert isinstance(result, AgentAction)
+        assert result.tool == "search"
+        assert result.tool_input == "query"
+
+    def test_format_answer_with_valid_final_answer(self) -> None:
+        """Test that format_answer correctly parses a valid final answer."""
+        answer = "Thought: I found the answer\nFinal Answer: The result is 42"
+        result = format_answer(answer)
+        assert isinstance(result, AgentFinish)
+        assert result.output == "The result is 42"
+
+    def test_format_answer_raises_output_parser_error_for_malformed_output(
+        self,
+    ) -> None:
+        """Test that format_answer re-raises OutputParserError for malformed output.
+
+        This is the core fix for issue #4113. Previously, format_answer would catch
+        all exceptions and return AgentFinish, which broke the retry logic.
+        """
+        malformed_answer = """Thought
+The user wants to verify something.
+Action
+Video Analysis Tool
+Action Input:
+{"query": "Is there something?"}"""
+
+        with pytest.raises(OutputParserError):
+            format_answer(malformed_answer)
+
+    def test_format_answer_raises_output_parser_error_missing_action(self) -> None:
+        """Test that format_answer re-raises OutputParserError when Action is missing."""
+        answer = "Thought: Let's search\nAction Input: query"
+        with pytest.raises(OutputParserError) as exc_info:
+            format_answer(answer)
+        assert "Action:" in str(exc_info.value)
+
+    def test_format_answer_raises_output_parser_error_missing_action_input(
+        self,
+    ) -> None:
+        """Test that format_answer re-raises OutputParserError when Action Input is missing."""
+        answer = "Thought: Let's search\nAction: search"
+        with pytest.raises(OutputParserError) as exc_info:
+            format_answer(answer)
+        assert "Action Input:" in str(exc_info.value)
+
+    def test_format_answer_returns_agent_finish_for_generic_exception(self) -> None:
+        """Test that format_answer returns AgentFinish for non-OutputParserError exceptions."""
+        with patch(
+            "crewai.utilities.agent_utils.parse",
+            side_effect=ValueError("Unexpected error"),
+        ):
+            result = format_answer("some answer")
+            assert isinstance(result, AgentFinish)
+            assert result.thought == "Failed to parse LLM response"
+            assert result.output == "some answer"
+
+
+class TestProcessLlmResponse:
+    """Tests for the process_llm_response function."""
+
+    def test_process_llm_response_raises_output_parser_error(self) -> None:
+        """Test that process_llm_response propagates OutputParserError."""
+        malformed_answer = "Thought\nMissing colons\nAction\nSome Tool"
+        with pytest.raises(OutputParserError):
+            process_llm_response(malformed_answer, use_stop_words=True)
+
+    def test_process_llm_response_with_valid_action(self) -> None:
+        """Test that process_llm_response correctly processes a valid action."""
+        answer = "Thought: Let's search\nAction: search\nAction Input: query"
+        result = process_llm_response(answer, use_stop_words=True)
+        assert isinstance(result, AgentAction)
+        assert result.tool == "search"
+
+    def test_process_llm_response_with_valid_final_answer(self) -> None:
+        """Test that process_llm_response correctly processes a valid final answer."""
+        answer = "Thought: Done\nFinal Answer: The result"
+        result = process_llm_response(answer, use_stop_words=True)
+        assert isinstance(result, AgentFinish)
+        assert result.output == "The result"
+
+
+class TestHandleMaxIterationsExceeded:
+    """Tests for the handle_max_iterations_exceeded function."""
+
+    def test_handle_max_iterations_exceeded_with_valid_final_answer(self) -> None:
+        """Test that handle_max_iterations_exceeded returns AgentFinish for valid output."""
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = "Thought: Done\nFinal Answer: The final result"
+        mock_printer = MagicMock()
+        mock_i18n = MagicMock()
+        mock_i18n.errors.return_value = "Please provide final answer"
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=mock_printer,
+            i18n=mock_i18n,
+            messages=[],
+            llm=mock_llm,
+            callbacks=[],
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.output == "The final result"
+
+    def test_handle_max_iterations_exceeded_with_valid_action_converts_to_finish(
+        self,
+    ) -> None:
+        """Test that handle_max_iterations_exceeded converts AgentAction to AgentFinish."""
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = (
+            "Thought: Using tool\nAction: search\nAction Input: query"
+        )
+        mock_printer = MagicMock()
+        mock_i18n = MagicMock()
+        mock_i18n.errors.return_value = "Please provide final answer"
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=mock_printer,
+            i18n=mock_i18n,
+            messages=[],
+            llm=mock_llm,
+            callbacks=[],
+        )
+
+        assert isinstance(result, AgentFinish)
+
+    def test_handle_max_iterations_exceeded_catches_output_parser_error(self) -> None:
+        """Test that handle_max_iterations_exceeded catches OutputParserError and returns AgentFinish.
+
+        This prevents infinite loops when the forced final answer is malformed.
+        Without this safeguard, the OutputParserError would bubble up to _invoke_loop(),
+        which would retry, hit max iterations again, and loop forever.
+        """
+        malformed_response = """Thought
+Missing colons everywhere
+Action
+Some Tool
+Action Input:
+{"query": "test"}"""
+
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = malformed_response
+        mock_printer = MagicMock()
+        mock_i18n = MagicMock()
+        mock_i18n.errors.return_value = "Please provide final answer"
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=None,
+            printer=mock_printer,
+            i18n=mock_i18n,
+            messages=[],
+            llm=mock_llm,
+            callbacks=[],
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.output == malformed_response
+        assert "Failed to parse LLM response during max iterations" in result.thought
+        mock_printer.print.assert_any_call(
+            content="Failed to parse forced final answer. Returning raw response.",
+            color="yellow",
+        )
+
+    def test_handle_max_iterations_exceeded_with_previous_formatted_answer(
+        self,
+    ) -> None:
+        """Test that handle_max_iterations_exceeded uses previous answer text."""
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = "Thought: Done\nFinal Answer: New result"
+        mock_printer = MagicMock()
+        mock_i18n = MagicMock()
+        mock_i18n.errors.return_value = "Please provide final answer"
+
+        previous_answer = AgentAction(
+            thought="Previous thought",
+            tool="search",
+            tool_input="query",
+            text="Previous text",
+        )
+
+        result = handle_max_iterations_exceeded(
+            formatted_answer=previous_answer,
+            printer=mock_printer,
+            i18n=mock_i18n,
+            messages=[],
+            llm=mock_llm,
+            callbacks=[],
+        )
+
+        assert isinstance(result, AgentFinish)
+        assert result.output == "New result"
+
+    def test_handle_max_iterations_exceeded_raises_on_empty_response(self) -> None:
+        """Test that handle_max_iterations_exceeded raises ValueError for empty response."""
+        mock_llm = MagicMock()
+        mock_llm.call.return_value = ""
+        mock_printer = MagicMock()
+        mock_i18n = MagicMock()
+        mock_i18n.errors.return_value = "Please provide final answer"
+
+        with pytest.raises(ValueError, match="Invalid response from LLM call"):
+            handle_max_iterations_exceeded(
+                formatted_answer=None,
+                printer=mock_printer,
+                i18n=mock_i18n,
+                messages=[],
+                llm=mock_llm,
+                callbacks=[],
+            )
+
+
+class TestRetryLogicIntegration:
+    """Integration tests to verify the retry logic works correctly with the fix."""
+
+    def test_malformed_output_allows_retry_in_format_answer(self) -> None:
+        """Test that malformed output raises OutputParserError which can be caught for retry.
+
+        This simulates what happens in _invoke_loop() when the LLM returns malformed output.
+        The OutputParserError should be raised so the loop can catch it and retry.
+        """
+        malformed_outputs = [
+            "Thought\nMissing colon after Thought",
+            "Thought: OK\nAction\nMissing colon after Action",
+            "Thought: OK\nAction: tool\nAction Input\nMissing colon",
+            "Random text without any structure",
+        ]
+
+        for malformed in malformed_outputs:
+            with pytest.raises(OutputParserError):
+                format_answer(malformed)
+
+    def test_valid_output_does_not_raise(self) -> None:
+        """Test that valid outputs are parsed correctly without raising."""
+        valid_outputs = [
+            ("Thought: Let's search\nAction: search\nAction Input: query", AgentAction),
+            ("Thought: Done\nFinal Answer: The result", AgentFinish),
+        ]
+
+        for output, expected_type in valid_outputs:
+            result = format_answer(output)
+            assert isinstance(result, expected_type)