Files
crewAI/tests/test_acceptance_criteria_validation.py
João Moura 6ecb30ee87 improved
2025-06-01 03:08:25 -07:00

215 lines
8.1 KiB
Python

"""Unit tests for acceptance criteria validation feature at task level."""
import pytest
from unittest.mock import MagicMock, patch, call
from typing import List, Tuple
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.agent_state import AgentState
from crewai.tools.agent_tools.scratchpad_tool import ScratchpadTool
from crewai.agents.parser import AgentFinish
from crewai.utilities import Printer
from crewai.llm import LLM
class TestAcceptanceCriteriaValidation:
"""Test suite for task-level acceptance criteria validation functionality."""
def setup_method(self):
"""Set up test fixtures."""
self.mock_llm = MagicMock(spec=LLM)
self.mock_agent = MagicMock()
self.mock_task = MagicMock()
self.mock_crew = MagicMock()
self.mock_tools_handler = MagicMock()
# Set up agent attributes
self.mock_agent.role = "Test Agent"
self.mock_agent.reasoning = True
self.mock_agent.verbose = False
self.mock_agent.reasoning_interval = None
self.mock_agent.adaptive_reasoning = False
# Create executor
self.executor = CrewAgentExecutor(
llm=self.mock_llm,
task=self.mock_task,
crew=self.mock_crew,
agent=self.mock_agent,
prompt={},
max_iter=10,
tools=[],
tools_names="",
stop_words=[],
tools_description="",
tools_handler=self.mock_tools_handler,
callbacks=[]
)
# Set up agent state with acceptance criteria
self.executor.agent_state = AgentState(task_id="test-task-id")
self.executor.agent_state.acceptance_criteria = [
"Include all required information",
"Format output properly",
"Provide complete analysis"
]
# Mock printer
self.executor._printer = MagicMock(spec=Printer)
def test_validate_acceptance_criteria_all_met(self):
"""Test validation when all acceptance criteria are met."""
output = "Complete output with all information, properly formatted, with full analysis"
# Configure LLM to return all criteria met
self.mock_llm.call.return_value = '''{
"1": "MET",
"2": "MET",
"3": "MET"
}'''
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
assert is_valid is True
assert unmet_criteria == []
assert self.mock_llm.call.call_count == 1
def test_validate_acceptance_criteria_some_unmet(self):
"""Test validation when some criteria are not met."""
output = "Partial output missing formatting"
# Configure LLM to return mixed results
self.mock_llm.call.return_value = '''{
"1": "MET",
"2": "NOT MET: Missing proper formatting",
"3": "NOT MET: Analysis incomplete"
}'''
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
assert is_valid is False
assert len(unmet_criteria) == 2
assert "Format output properly" in unmet_criteria
assert "Provide complete analysis" in unmet_criteria
def test_create_criteria_retry_prompt_with_scratchpad(self):
"""Test retry prompt creation when scratchpad has data."""
# Set up scratchpad tool with data
self.executor.scratchpad_tool = ScratchpadTool()
self.executor.agent_state.scratchpad = {
"research_data": {"key": "value"},
"analysis_results": ["item1", "item2"]
}
# Set up task details
self.mock_task.description = "Analyze research data and provide insights"
self.mock_task.expected_output = "A comprehensive report with analysis and recommendations"
unmet_criteria = ["Include specific examples", "Add recommendations"]
prompt = self.executor._create_criteria_retry_prompt(unmet_criteria)
# Verify prompt content with new format
assert "VALIDATION FAILED" in prompt
assert "YOU CANNOT PROVIDE A FINAL ANSWER YET" in prompt
assert "ORIGINAL TASK:" in prompt
assert "Analyze research data" in prompt
assert "EXPECTED OUTPUT:" in prompt
assert "comprehensive report" in prompt
assert "Include specific examples" in prompt
assert "Add recommendations" in prompt
assert "Access Scratchpad Memory" in prompt
assert "'research_data'" in prompt
assert "'analysis_results'" in prompt
assert "Action:" in prompt
assert "Action Input:" in prompt
assert "CONTINUE WITH TOOL USAGE NOW" in prompt
assert "DO NOT ATTEMPT ANOTHER FINAL ANSWER" in prompt
def test_create_criteria_retry_prompt_without_scratchpad(self):
"""Test retry prompt creation when no scratchpad data exists."""
unmet_criteria = ["Add more detail"]
prompt = self.executor._create_criteria_retry_prompt(unmet_criteria)
assert "Add more detail" in prompt
assert "VALIDATION FAILED" in prompt
assert "📦 YOUR SCRATCHPAD CONTAINS DATA" not in prompt
@patch('crewai.agents.crew_agent_executor.get_llm_response')
@patch('crewai.agents.crew_agent_executor.process_llm_response')
def test_invoke_loop_blocks_incomplete_final_answer(self, mock_process, mock_get_response):
"""Test that invoke loop blocks incomplete final answers."""
# Set up conditions
self.executor.agent_state.acceptance_criteria = ["Complete all sections"]
# First attempt returns incomplete final answer
incomplete_answer = AgentFinish(
thought="Done",
output="Exploring potential follow-up tasks!",
text="Final Answer: Exploring potential follow-up tasks!"
)
# After retry, return complete answer
complete_answer = AgentFinish(
thought="Done with all sections",
output="Complete output with all sections addressed",
text="Final Answer: Complete output with all sections addressed"
)
# Configure mocks
mock_process.side_effect = [incomplete_answer, complete_answer]
mock_get_response.return_value = "response"
# Configure validation
self.mock_llm.call.side_effect = [
'{"1": "NOT MET: Missing required sections"}', # First validation fails
'{"1": "MET"}' # Second validation passes
]
# Execute
result = self.executor._invoke_loop()
# Verify
assert result == complete_answer
assert self.mock_llm.call.call_count == 2 # Two validation attempts
assert mock_process.call_count == 2 # Two processing attempts
# Verify error message was shown
self._verify_validation_messages_shown()
def test_validation_happens_on_every_final_answer_attempt(self):
"""Test that validation happens on every AgentFinish attempt."""
self.executor.agent_state.acceptance_criteria = ["Complete all sections"]
# Configure LLM to always return criteria not met
self.mock_llm.call.return_value = '{"1": "NOT MET: Missing required sections"}'
output = "Incomplete output"
# Validate multiple times - each should trigger validation
for _ in range(3):
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
assert is_valid is False
assert len(unmet_criteria) == 1
# Verify validation was called every time
assert self.mock_llm.call.call_count == 3
def _verify_validation_messages_shown(self):
"""Helper to verify validation messages were displayed."""
print_calls = self.executor._printer.print.call_args_list
# Check for validation message
validation_msg_shown = any(
"Validating acceptance criteria" in str(call)
for call in print_calls
)
# Check for failure message
failure_msg_shown = any(
"Cannot finalize" in str(call)
for call in print_calls
)
assert validation_msg_shown or failure_msg_shown