mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
215 lines
8.1 KiB
Python
215 lines
8.1 KiB
Python
"""Unit tests for acceptance criteria validation feature at task level."""
|
|
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch, call
|
|
from typing import List, Tuple
|
|
|
|
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
|
from crewai.agents.agent_state import AgentState
|
|
from crewai.tools.agent_tools.scratchpad_tool import ScratchpadTool
|
|
from crewai.agents.parser import AgentFinish
|
|
from crewai.utilities import Printer
|
|
from crewai.llm import LLM
|
|
|
|
|
|
class TestAcceptanceCriteriaValidation:
|
|
"""Test suite for task-level acceptance criteria validation functionality."""
|
|
|
|
def setup_method(self):
|
|
"""Set up test fixtures."""
|
|
self.mock_llm = MagicMock(spec=LLM)
|
|
self.mock_agent = MagicMock()
|
|
self.mock_task = MagicMock()
|
|
self.mock_crew = MagicMock()
|
|
self.mock_tools_handler = MagicMock()
|
|
|
|
# Set up agent attributes
|
|
self.mock_agent.role = "Test Agent"
|
|
self.mock_agent.reasoning = True
|
|
self.mock_agent.verbose = False
|
|
self.mock_agent.reasoning_interval = None
|
|
self.mock_agent.adaptive_reasoning = False
|
|
|
|
# Create executor
|
|
self.executor = CrewAgentExecutor(
|
|
llm=self.mock_llm,
|
|
task=self.mock_task,
|
|
crew=self.mock_crew,
|
|
agent=self.mock_agent,
|
|
prompt={},
|
|
max_iter=10,
|
|
tools=[],
|
|
tools_names="",
|
|
stop_words=[],
|
|
tools_description="",
|
|
tools_handler=self.mock_tools_handler,
|
|
callbacks=[]
|
|
)
|
|
|
|
# Set up agent state with acceptance criteria
|
|
self.executor.agent_state = AgentState(task_id="test-task-id")
|
|
self.executor.agent_state.acceptance_criteria = [
|
|
"Include all required information",
|
|
"Format output properly",
|
|
"Provide complete analysis"
|
|
]
|
|
|
|
# Mock printer
|
|
self.executor._printer = MagicMock(spec=Printer)
|
|
|
|
def test_validate_acceptance_criteria_all_met(self):
|
|
"""Test validation when all acceptance criteria are met."""
|
|
output = "Complete output with all information, properly formatted, with full analysis"
|
|
|
|
# Configure LLM to return all criteria met
|
|
self.mock_llm.call.return_value = '''{
|
|
"1": "MET",
|
|
"2": "MET",
|
|
"3": "MET"
|
|
}'''
|
|
|
|
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
|
|
|
|
assert is_valid is True
|
|
assert unmet_criteria == []
|
|
assert self.mock_llm.call.call_count == 1
|
|
|
|
def test_validate_acceptance_criteria_some_unmet(self):
|
|
"""Test validation when some criteria are not met."""
|
|
output = "Partial output missing formatting"
|
|
|
|
# Configure LLM to return mixed results
|
|
self.mock_llm.call.return_value = '''{
|
|
"1": "MET",
|
|
"2": "NOT MET: Missing proper formatting",
|
|
"3": "NOT MET: Analysis incomplete"
|
|
}'''
|
|
|
|
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
|
|
|
|
assert is_valid is False
|
|
assert len(unmet_criteria) == 2
|
|
assert "Format output properly" in unmet_criteria
|
|
assert "Provide complete analysis" in unmet_criteria
|
|
|
|
def test_create_criteria_retry_prompt_with_scratchpad(self):
|
|
"""Test retry prompt creation when scratchpad has data."""
|
|
# Set up scratchpad tool with data
|
|
self.executor.scratchpad_tool = ScratchpadTool()
|
|
self.executor.agent_state.scratchpad = {
|
|
"research_data": {"key": "value"},
|
|
"analysis_results": ["item1", "item2"]
|
|
}
|
|
|
|
# Set up task details
|
|
self.mock_task.description = "Analyze research data and provide insights"
|
|
self.mock_task.expected_output = "A comprehensive report with analysis and recommendations"
|
|
|
|
unmet_criteria = ["Include specific examples", "Add recommendations"]
|
|
|
|
prompt = self.executor._create_criteria_retry_prompt(unmet_criteria)
|
|
|
|
# Verify prompt content with new format
|
|
assert "VALIDATION FAILED" in prompt
|
|
assert "YOU CANNOT PROVIDE A FINAL ANSWER YET" in prompt
|
|
assert "ORIGINAL TASK:" in prompt
|
|
assert "Analyze research data" in prompt
|
|
assert "EXPECTED OUTPUT:" in prompt
|
|
assert "comprehensive report" in prompt
|
|
assert "Include specific examples" in prompt
|
|
assert "Add recommendations" in prompt
|
|
assert "Access Scratchpad Memory" in prompt
|
|
assert "'research_data'" in prompt
|
|
assert "'analysis_results'" in prompt
|
|
assert "Action:" in prompt
|
|
assert "Action Input:" in prompt
|
|
assert "CONTINUE WITH TOOL USAGE NOW" in prompt
|
|
assert "DO NOT ATTEMPT ANOTHER FINAL ANSWER" in prompt
|
|
|
|
def test_create_criteria_retry_prompt_without_scratchpad(self):
|
|
"""Test retry prompt creation when no scratchpad data exists."""
|
|
unmet_criteria = ["Add more detail"]
|
|
|
|
prompt = self.executor._create_criteria_retry_prompt(unmet_criteria)
|
|
|
|
assert "Add more detail" in prompt
|
|
assert "VALIDATION FAILED" in prompt
|
|
assert "📦 YOUR SCRATCHPAD CONTAINS DATA" not in prompt
|
|
|
|
@patch('crewai.agents.crew_agent_executor.get_llm_response')
|
|
@patch('crewai.agents.crew_agent_executor.process_llm_response')
|
|
def test_invoke_loop_blocks_incomplete_final_answer(self, mock_process, mock_get_response):
|
|
"""Test that invoke loop blocks incomplete final answers."""
|
|
# Set up conditions
|
|
self.executor.agent_state.acceptance_criteria = ["Complete all sections"]
|
|
|
|
# First attempt returns incomplete final answer
|
|
incomplete_answer = AgentFinish(
|
|
thought="Done",
|
|
output="Exploring potential follow-up tasks!",
|
|
text="Final Answer: Exploring potential follow-up tasks!"
|
|
)
|
|
|
|
# After retry, return complete answer
|
|
complete_answer = AgentFinish(
|
|
thought="Done with all sections",
|
|
output="Complete output with all sections addressed",
|
|
text="Final Answer: Complete output with all sections addressed"
|
|
)
|
|
|
|
# Configure mocks
|
|
mock_process.side_effect = [incomplete_answer, complete_answer]
|
|
mock_get_response.return_value = "response"
|
|
|
|
# Configure validation
|
|
self.mock_llm.call.side_effect = [
|
|
'{"1": "NOT MET: Missing required sections"}', # First validation fails
|
|
'{"1": "MET"}' # Second validation passes
|
|
]
|
|
|
|
# Execute
|
|
result = self.executor._invoke_loop()
|
|
|
|
# Verify
|
|
assert result == complete_answer
|
|
assert self.mock_llm.call.call_count == 2 # Two validation attempts
|
|
assert mock_process.call_count == 2 # Two processing attempts
|
|
|
|
# Verify error message was shown
|
|
self._verify_validation_messages_shown()
|
|
|
|
def test_validation_happens_on_every_final_answer_attempt(self):
|
|
"""Test that validation happens on every AgentFinish attempt."""
|
|
self.executor.agent_state.acceptance_criteria = ["Complete all sections"]
|
|
|
|
# Configure LLM to always return criteria not met
|
|
self.mock_llm.call.return_value = '{"1": "NOT MET: Missing required sections"}'
|
|
|
|
output = "Incomplete output"
|
|
|
|
# Validate multiple times - each should trigger validation
|
|
for _ in range(3):
|
|
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
|
|
assert is_valid is False
|
|
assert len(unmet_criteria) == 1
|
|
|
|
# Verify validation was called every time
|
|
assert self.mock_llm.call.call_count == 3
|
|
|
|
def _verify_validation_messages_shown(self):
|
|
"""Helper to verify validation messages were displayed."""
|
|
print_calls = self.executor._printer.print.call_args_list
|
|
|
|
# Check for validation message
|
|
validation_msg_shown = any(
|
|
"Validating acceptance criteria" in str(call)
|
|
for call in print_calls
|
|
)
|
|
|
|
# Check for failure message
|
|
failure_msg_shown = any(
|
|
"Cannot finalize" in str(call)
|
|
for call in print_calls
|
|
)
|
|
|
|
assert validation_msg_shown or failure_msg_shown |