mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
This fix addresses issue #4113 where format_answer() was catching all exceptions including OutputParserError and converting them to AgentFinish, which prevented the retry logic in _invoke_loop() from working correctly when LLMs return malformed output. Changes: - Modified format_answer() to re-raise OutputParserError so it can be caught by the retry logic in crew_agent_executor.py and lite_agent.py - Added safeguard in handle_max_iterations_exceeded() to catch OutputParserError and return AgentFinish to prevent infinite loops when forced final answer is malformed - Added comprehensive tests covering the fix The fix ensures that when an LLM returns malformed output (e.g., missing colons after Thought/Action/Action Input), the agent will retry with an error message instead of immediately returning the malformed text as the final answer. Co-Authored-By: João <joao@crewai.com>
273 lines
10 KiB
Python
273 lines
10 KiB
Python
"""Tests for agent_utils module.
|
|
|
|
These tests cover the format_answer() and handle_max_iterations_exceeded() functions,
|
|
specifically testing the fix for issue #4113 where OutputParserError was being
|
|
swallowed instead of being re-raised for retry logic.
|
|
"""
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from crewai.agents.parser import (
|
|
AgentAction,
|
|
AgentFinish,
|
|
OutputParserError,
|
|
)
|
|
from crewai.utilities.agent_utils import (
|
|
format_answer,
|
|
handle_max_iterations_exceeded,
|
|
process_llm_response,
|
|
)
|
|
|
|
|
|
class TestFormatAnswer:
|
|
"""Tests for the format_answer function."""
|
|
|
|
def test_format_answer_with_valid_action(self) -> None:
|
|
"""Test that format_answer correctly parses a valid action."""
|
|
answer = "Thought: Let's search\nAction: search\nAction Input: query"
|
|
result = format_answer(answer)
|
|
assert isinstance(result, AgentAction)
|
|
assert result.tool == "search"
|
|
assert result.tool_input == "query"
|
|
|
|
def test_format_answer_with_valid_final_answer(self) -> None:
|
|
"""Test that format_answer correctly parses a valid final answer."""
|
|
answer = "Thought: I found the answer\nFinal Answer: The result is 42"
|
|
result = format_answer(answer)
|
|
assert isinstance(result, AgentFinish)
|
|
assert result.output == "The result is 42"
|
|
|
|
def test_format_answer_raises_output_parser_error_for_malformed_output(
|
|
self,
|
|
) -> None:
|
|
"""Test that format_answer re-raises OutputParserError for malformed output.
|
|
|
|
This is the core fix for issue #4113. Previously, format_answer would catch
|
|
all exceptions and return AgentFinish, which broke the retry logic.
|
|
"""
|
|
malformed_answer = """Thought
|
|
The user wants to verify something.
|
|
Action
|
|
Video Analysis Tool
|
|
Action Input:
|
|
{"query": "Is there something?"}"""
|
|
|
|
with pytest.raises(OutputParserError):
|
|
format_answer(malformed_answer)
|
|
|
|
def test_format_answer_raises_output_parser_error_missing_action(self) -> None:
|
|
"""Test that format_answer re-raises OutputParserError when Action is missing."""
|
|
answer = "Thought: Let's search\nAction Input: query"
|
|
with pytest.raises(OutputParserError) as exc_info:
|
|
format_answer(answer)
|
|
assert "Action:" in str(exc_info.value)
|
|
|
|
def test_format_answer_raises_output_parser_error_missing_action_input(
|
|
self,
|
|
) -> None:
|
|
"""Test that format_answer re-raises OutputParserError when Action Input is missing."""
|
|
answer = "Thought: Let's search\nAction: search"
|
|
with pytest.raises(OutputParserError) as exc_info:
|
|
format_answer(answer)
|
|
assert "Action Input:" in str(exc_info.value)
|
|
|
|
def test_format_answer_returns_agent_finish_for_generic_exception(self) -> None:
|
|
"""Test that format_answer returns AgentFinish for non-OutputParserError exceptions."""
|
|
with patch(
|
|
"crewai.utilities.agent_utils.parse",
|
|
side_effect=ValueError("Unexpected error"),
|
|
):
|
|
result = format_answer("some answer")
|
|
assert isinstance(result, AgentFinish)
|
|
assert result.thought == "Failed to parse LLM response"
|
|
assert result.output == "some answer"
|
|
|
|
|
|
class TestProcessLlmResponse:
|
|
"""Tests for the process_llm_response function."""
|
|
|
|
def test_process_llm_response_raises_output_parser_error(self) -> None:
|
|
"""Test that process_llm_response propagates OutputParserError."""
|
|
malformed_answer = "Thought\nMissing colons\nAction\nSome Tool"
|
|
with pytest.raises(OutputParserError):
|
|
process_llm_response(malformed_answer, use_stop_words=True)
|
|
|
|
def test_process_llm_response_with_valid_action(self) -> None:
|
|
"""Test that process_llm_response correctly processes a valid action."""
|
|
answer = "Thought: Let's search\nAction: search\nAction Input: query"
|
|
result = process_llm_response(answer, use_stop_words=True)
|
|
assert isinstance(result, AgentAction)
|
|
assert result.tool == "search"
|
|
|
|
def test_process_llm_response_with_valid_final_answer(self) -> None:
|
|
"""Test that process_llm_response correctly processes a valid final answer."""
|
|
answer = "Thought: Done\nFinal Answer: The result"
|
|
result = process_llm_response(answer, use_stop_words=True)
|
|
assert isinstance(result, AgentFinish)
|
|
assert result.output == "The result"
|
|
|
|
|
|
class TestHandleMaxIterationsExceeded:
|
|
"""Tests for the handle_max_iterations_exceeded function."""
|
|
|
|
def test_handle_max_iterations_exceeded_with_valid_final_answer(self) -> None:
|
|
"""Test that handle_max_iterations_exceeded returns AgentFinish for valid output."""
|
|
mock_llm = MagicMock()
|
|
mock_llm.call.return_value = "Thought: Done\nFinal Answer: The final result"
|
|
mock_printer = MagicMock()
|
|
mock_i18n = MagicMock()
|
|
mock_i18n.errors.return_value = "Please provide final answer"
|
|
|
|
result = handle_max_iterations_exceeded(
|
|
formatted_answer=None,
|
|
printer=mock_printer,
|
|
i18n=mock_i18n,
|
|
messages=[],
|
|
llm=mock_llm,
|
|
callbacks=[],
|
|
)
|
|
|
|
assert isinstance(result, AgentFinish)
|
|
assert result.output == "The final result"
|
|
|
|
def test_handle_max_iterations_exceeded_with_valid_action_converts_to_finish(
|
|
self,
|
|
) -> None:
|
|
"""Test that handle_max_iterations_exceeded converts AgentAction to AgentFinish."""
|
|
mock_llm = MagicMock()
|
|
mock_llm.call.return_value = (
|
|
"Thought: Using tool\nAction: search\nAction Input: query"
|
|
)
|
|
mock_printer = MagicMock()
|
|
mock_i18n = MagicMock()
|
|
mock_i18n.errors.return_value = "Please provide final answer"
|
|
|
|
result = handle_max_iterations_exceeded(
|
|
formatted_answer=None,
|
|
printer=mock_printer,
|
|
i18n=mock_i18n,
|
|
messages=[],
|
|
llm=mock_llm,
|
|
callbacks=[],
|
|
)
|
|
|
|
assert isinstance(result, AgentFinish)
|
|
|
|
def test_handle_max_iterations_exceeded_catches_output_parser_error(self) -> None:
|
|
"""Test that handle_max_iterations_exceeded catches OutputParserError and returns AgentFinish.
|
|
|
|
This prevents infinite loops when the forced final answer is malformed.
|
|
Without this safeguard, the OutputParserError would bubble up to _invoke_loop(),
|
|
which would retry, hit max iterations again, and loop forever.
|
|
"""
|
|
malformed_response = """Thought
|
|
Missing colons everywhere
|
|
Action
|
|
Some Tool
|
|
Action Input:
|
|
{"query": "test"}"""
|
|
|
|
mock_llm = MagicMock()
|
|
mock_llm.call.return_value = malformed_response
|
|
mock_printer = MagicMock()
|
|
mock_i18n = MagicMock()
|
|
mock_i18n.errors.return_value = "Please provide final answer"
|
|
|
|
result = handle_max_iterations_exceeded(
|
|
formatted_answer=None,
|
|
printer=mock_printer,
|
|
i18n=mock_i18n,
|
|
messages=[],
|
|
llm=mock_llm,
|
|
callbacks=[],
|
|
)
|
|
|
|
assert isinstance(result, AgentFinish)
|
|
assert result.output == malformed_response
|
|
assert "Failed to parse LLM response during max iterations" in result.thought
|
|
mock_printer.print.assert_any_call(
|
|
content="Failed to parse forced final answer. Returning raw response.",
|
|
color="yellow",
|
|
)
|
|
|
|
def test_handle_max_iterations_exceeded_with_previous_formatted_answer(
|
|
self,
|
|
) -> None:
|
|
"""Test that handle_max_iterations_exceeded uses previous answer text."""
|
|
mock_llm = MagicMock()
|
|
mock_llm.call.return_value = "Thought: Done\nFinal Answer: New result"
|
|
mock_printer = MagicMock()
|
|
mock_i18n = MagicMock()
|
|
mock_i18n.errors.return_value = "Please provide final answer"
|
|
|
|
previous_answer = AgentAction(
|
|
thought="Previous thought",
|
|
tool="search",
|
|
tool_input="query",
|
|
text="Previous text",
|
|
)
|
|
|
|
result = handle_max_iterations_exceeded(
|
|
formatted_answer=previous_answer,
|
|
printer=mock_printer,
|
|
i18n=mock_i18n,
|
|
messages=[],
|
|
llm=mock_llm,
|
|
callbacks=[],
|
|
)
|
|
|
|
assert isinstance(result, AgentFinish)
|
|
assert result.output == "New result"
|
|
|
|
def test_handle_max_iterations_exceeded_raises_on_empty_response(self) -> None:
|
|
"""Test that handle_max_iterations_exceeded raises ValueError for empty response."""
|
|
mock_llm = MagicMock()
|
|
mock_llm.call.return_value = ""
|
|
mock_printer = MagicMock()
|
|
mock_i18n = MagicMock()
|
|
mock_i18n.errors.return_value = "Please provide final answer"
|
|
|
|
with pytest.raises(ValueError, match="Invalid response from LLM call"):
|
|
handle_max_iterations_exceeded(
|
|
formatted_answer=None,
|
|
printer=mock_printer,
|
|
i18n=mock_i18n,
|
|
messages=[],
|
|
llm=mock_llm,
|
|
callbacks=[],
|
|
)
|
|
|
|
|
|
class TestRetryLogicIntegration:
|
|
"""Integration tests to verify the retry logic works correctly with the fix."""
|
|
|
|
def test_malformed_output_allows_retry_in_format_answer(self) -> None:
|
|
"""Test that malformed output raises OutputParserError which can be caught for retry.
|
|
|
|
This simulates what happens in _invoke_loop() when the LLM returns malformed output.
|
|
The OutputParserError should be raised so the loop can catch it and retry.
|
|
"""
|
|
malformed_outputs = [
|
|
"Thought\nMissing colon after Thought",
|
|
"Thought: OK\nAction\nMissing colon after Action",
|
|
"Thought: OK\nAction: tool\nAction Input\nMissing colon",
|
|
"Random text without any structure",
|
|
]
|
|
|
|
for malformed in malformed_outputs:
|
|
with pytest.raises(OutputParserError):
|
|
format_answer(malformed)
|
|
|
|
def test_valid_output_does_not_raise(self) -> None:
|
|
"""Test that valid outputs are parsed correctly without raising."""
|
|
valid_outputs = [
|
|
("Thought: Let's search\nAction: search\nAction Input: query", AgentAction),
|
|
("Thought: Done\nFinal Answer: The result", AgentFinish),
|
|
]
|
|
|
|
for output, expected_type in valid_outputs:
|
|
result = format_answer(output)
|
|
assert isinstance(result, expected_type)
|