mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
improved
This commit is contained in:
File diff suppressed because one or more lines are too long
215
tests/test_acceptance_criteria_validation.py
Normal file
215
tests/test_acceptance_criteria_validation.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""Unit tests for acceptance criteria validation feature at task level."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, call
|
||||
from typing import List, Tuple
|
||||
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
from crewai.agents.agent_state import AgentState
|
||||
from crewai.tools.agent_tools.scratchpad_tool import ScratchpadTool
|
||||
from crewai.agents.parser import AgentFinish
|
||||
from crewai.utilities import Printer
|
||||
from crewai.llm import LLM
|
||||
|
||||
|
||||
class TestAcceptanceCriteriaValidation:
|
||||
"""Test suite for task-level acceptance criteria validation functionality."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.mock_llm = MagicMock(spec=LLM)
|
||||
self.mock_agent = MagicMock()
|
||||
self.mock_task = MagicMock()
|
||||
self.mock_crew = MagicMock()
|
||||
self.mock_tools_handler = MagicMock()
|
||||
|
||||
# Set up agent attributes
|
||||
self.mock_agent.role = "Test Agent"
|
||||
self.mock_agent.reasoning = True
|
||||
self.mock_agent.verbose = False
|
||||
self.mock_agent.reasoning_interval = None
|
||||
self.mock_agent.adaptive_reasoning = False
|
||||
|
||||
# Create executor
|
||||
self.executor = CrewAgentExecutor(
|
||||
llm=self.mock_llm,
|
||||
task=self.mock_task,
|
||||
crew=self.mock_crew,
|
||||
agent=self.mock_agent,
|
||||
prompt={},
|
||||
max_iter=10,
|
||||
tools=[],
|
||||
tools_names="",
|
||||
stop_words=[],
|
||||
tools_description="",
|
||||
tools_handler=self.mock_tools_handler,
|
||||
callbacks=[]
|
||||
)
|
||||
|
||||
# Set up agent state with acceptance criteria
|
||||
self.executor.agent_state = AgentState(task_id="test-task-id")
|
||||
self.executor.agent_state.acceptance_criteria = [
|
||||
"Include all required information",
|
||||
"Format output properly",
|
||||
"Provide complete analysis"
|
||||
]
|
||||
|
||||
# Mock printer
|
||||
self.executor._printer = MagicMock(spec=Printer)
|
||||
|
||||
def test_validate_acceptance_criteria_all_met(self):
|
||||
"""Test validation when all acceptance criteria are met."""
|
||||
output = "Complete output with all information, properly formatted, with full analysis"
|
||||
|
||||
# Configure LLM to return all criteria met
|
||||
self.mock_llm.call.return_value = '''{
|
||||
"1": "MET",
|
||||
"2": "MET",
|
||||
"3": "MET"
|
||||
}'''
|
||||
|
||||
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
|
||||
|
||||
assert is_valid is True
|
||||
assert unmet_criteria == []
|
||||
assert self.mock_llm.call.call_count == 1
|
||||
|
||||
def test_validate_acceptance_criteria_some_unmet(self):
|
||||
"""Test validation when some criteria are not met."""
|
||||
output = "Partial output missing formatting"
|
||||
|
||||
# Configure LLM to return mixed results
|
||||
self.mock_llm.call.return_value = '''{
|
||||
"1": "MET",
|
||||
"2": "NOT MET: Missing proper formatting",
|
||||
"3": "NOT MET: Analysis incomplete"
|
||||
}'''
|
||||
|
||||
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
|
||||
|
||||
assert is_valid is False
|
||||
assert len(unmet_criteria) == 2
|
||||
assert "Format output properly" in unmet_criteria
|
||||
assert "Provide complete analysis" in unmet_criteria
|
||||
|
||||
def test_create_criteria_retry_prompt_with_scratchpad(self):
|
||||
"""Test retry prompt creation when scratchpad has data."""
|
||||
# Set up scratchpad tool with data
|
||||
self.executor.scratchpad_tool = ScratchpadTool()
|
||||
self.executor.agent_state.scratchpad = {
|
||||
"research_data": {"key": "value"},
|
||||
"analysis_results": ["item1", "item2"]
|
||||
}
|
||||
|
||||
# Set up task details
|
||||
self.mock_task.description = "Analyze research data and provide insights"
|
||||
self.mock_task.expected_output = "A comprehensive report with analysis and recommendations"
|
||||
|
||||
unmet_criteria = ["Include specific examples", "Add recommendations"]
|
||||
|
||||
prompt = self.executor._create_criteria_retry_prompt(unmet_criteria)
|
||||
|
||||
# Verify prompt content with new format
|
||||
assert "VALIDATION FAILED" in prompt
|
||||
assert "YOU CANNOT PROVIDE A FINAL ANSWER YET" in prompt
|
||||
assert "ORIGINAL TASK:" in prompt
|
||||
assert "Analyze research data" in prompt
|
||||
assert "EXPECTED OUTPUT:" in prompt
|
||||
assert "comprehensive report" in prompt
|
||||
assert "Include specific examples" in prompt
|
||||
assert "Add recommendations" in prompt
|
||||
assert "Access Scratchpad Memory" in prompt
|
||||
assert "'research_data'" in prompt
|
||||
assert "'analysis_results'" in prompt
|
||||
assert "Action:" in prompt
|
||||
assert "Action Input:" in prompt
|
||||
assert "CONTINUE WITH TOOL USAGE NOW" in prompt
|
||||
assert "DO NOT ATTEMPT ANOTHER FINAL ANSWER" in prompt
|
||||
|
||||
def test_create_criteria_retry_prompt_without_scratchpad(self):
|
||||
"""Test retry prompt creation when no scratchpad data exists."""
|
||||
unmet_criteria = ["Add more detail"]
|
||||
|
||||
prompt = self.executor._create_criteria_retry_prompt(unmet_criteria)
|
||||
|
||||
assert "Add more detail" in prompt
|
||||
assert "VALIDATION FAILED" in prompt
|
||||
assert "📦 YOUR SCRATCHPAD CONTAINS DATA" not in prompt
|
||||
|
||||
@patch('crewai.agents.crew_agent_executor.get_llm_response')
|
||||
@patch('crewai.agents.crew_agent_executor.process_llm_response')
|
||||
def test_invoke_loop_blocks_incomplete_final_answer(self, mock_process, mock_get_response):
|
||||
"""Test that invoke loop blocks incomplete final answers."""
|
||||
# Set up conditions
|
||||
self.executor.agent_state.acceptance_criteria = ["Complete all sections"]
|
||||
|
||||
# First attempt returns incomplete final answer
|
||||
incomplete_answer = AgentFinish(
|
||||
thought="Done",
|
||||
output="Exploring potential follow-up tasks!",
|
||||
text="Final Answer: Exploring potential follow-up tasks!"
|
||||
)
|
||||
|
||||
# After retry, return complete answer
|
||||
complete_answer = AgentFinish(
|
||||
thought="Done with all sections",
|
||||
output="Complete output with all sections addressed",
|
||||
text="Final Answer: Complete output with all sections addressed"
|
||||
)
|
||||
|
||||
# Configure mocks
|
||||
mock_process.side_effect = [incomplete_answer, complete_answer]
|
||||
mock_get_response.return_value = "response"
|
||||
|
||||
# Configure validation
|
||||
self.mock_llm.call.side_effect = [
|
||||
'{"1": "NOT MET: Missing required sections"}', # First validation fails
|
||||
'{"1": "MET"}' # Second validation passes
|
||||
]
|
||||
|
||||
# Execute
|
||||
result = self.executor._invoke_loop()
|
||||
|
||||
# Verify
|
||||
assert result == complete_answer
|
||||
assert self.mock_llm.call.call_count == 2 # Two validation attempts
|
||||
assert mock_process.call_count == 2 # Two processing attempts
|
||||
|
||||
# Verify error message was shown
|
||||
self._verify_validation_messages_shown()
|
||||
|
||||
def test_validation_happens_on_every_final_answer_attempt(self):
|
||||
"""Test that validation happens on every AgentFinish attempt."""
|
||||
self.executor.agent_state.acceptance_criteria = ["Complete all sections"]
|
||||
|
||||
# Configure LLM to always return criteria not met
|
||||
self.mock_llm.call.return_value = '{"1": "NOT MET: Missing required sections"}'
|
||||
|
||||
output = "Incomplete output"
|
||||
|
||||
# Validate multiple times - each should trigger validation
|
||||
for _ in range(3):
|
||||
is_valid, unmet_criteria = self.executor._validate_acceptance_criteria(output)
|
||||
assert is_valid is False
|
||||
assert len(unmet_criteria) == 1
|
||||
|
||||
# Verify validation was called every time
|
||||
assert self.mock_llm.call.call_count == 3
|
||||
|
||||
def _verify_validation_messages_shown(self):
|
||||
"""Helper to verify validation messages were displayed."""
|
||||
print_calls = self.executor._printer.print.call_args_list
|
||||
|
||||
# Check for validation message
|
||||
validation_msg_shown = any(
|
||||
"Validating acceptance criteria" in str(call)
|
||||
for call in print_calls
|
||||
)
|
||||
|
||||
# Check for failure message
|
||||
failure_msg_shown = any(
|
||||
"Cannot finalize" in str(call)
|
||||
for call in print_calls
|
||||
)
|
||||
|
||||
assert validation_msg_shown or failure_msg_shown
|
||||
137
tests/tools/agent_tools/test_scratchpad_tool.py
Normal file
137
tests/tools/agent_tools/test_scratchpad_tool.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Unit tests for the ScratchpadTool."""
|
||||
|
||||
import pytest
|
||||
from crewai.tools.agent_tools.scratchpad_tool import ScratchpadTool, ScratchpadToolSchema
|
||||
|
||||
|
||||
class TestScratchpadTool:
|
||||
"""Test suite for the ScratchpadTool functionality."""
|
||||
|
||||
def test_schema_description(self):
|
||||
"""Test that the schema has helpful description."""
|
||||
schema = ScratchpadToolSchema
|
||||
key_field = schema.model_fields['key']
|
||||
|
||||
assert "Example:" in key_field.description
|
||||
assert '{"key":' in key_field.description
|
||||
|
||||
def test_empty_scratchpad_error_message(self):
|
||||
"""Test error message when scratchpad is empty."""
|
||||
tool = ScratchpadTool()
|
||||
result = tool._run(key="nonexistent")
|
||||
|
||||
assert "❌ SCRATCHPAD IS EMPTY" in result
|
||||
assert "does not contain any data yet" in result
|
||||
assert "Try executing other tools first" in result
|
||||
|
||||
def test_key_not_found_error_message(self):
|
||||
"""Test error message when key is not found."""
|
||||
tool = ScratchpadTool(scratchpad_data={
|
||||
"existing_key": "value",
|
||||
"another_key": {"data": "test"}
|
||||
})
|
||||
|
||||
result = tool._run(key="wrong_key")
|
||||
|
||||
assert "❌ KEY NOT FOUND: 'wrong_key'" in result
|
||||
assert "Available keys:" in result
|
||||
assert "- 'existing_key'" in result
|
||||
assert "- 'another_key'" in result
|
||||
assert 'Example Action Input: {"key": "existing_key"}' in result
|
||||
assert "Keys are case-sensitive" in result
|
||||
|
||||
def test_successful_retrieval_string(self):
|
||||
"""Test successful retrieval of string data."""
|
||||
tool = ScratchpadTool(scratchpad_data={
|
||||
"message": "Hello, World!"
|
||||
})
|
||||
|
||||
result = tool._run(key="message")
|
||||
assert result == "Hello, World!"
|
||||
|
||||
def test_successful_retrieval_dict(self):
|
||||
"""Test successful retrieval of dictionary data."""
|
||||
test_dict = {"name": "John", "age": 30}
|
||||
tool = ScratchpadTool(scratchpad_data={
|
||||
"user_data": test_dict
|
||||
})
|
||||
|
||||
result = tool._run(key="user_data")
|
||||
assert '"name": "John"' in result
|
||||
assert '"age": 30' in result
|
||||
|
||||
def test_successful_retrieval_list(self):
|
||||
"""Test successful retrieval of list data."""
|
||||
test_list = ["item1", "item2", "item3"]
|
||||
tool = ScratchpadTool(scratchpad_data={
|
||||
"items": test_list
|
||||
})
|
||||
|
||||
result = tool._run(key="items")
|
||||
assert '"item1"' in result
|
||||
assert '"item2"' in result
|
||||
assert '"item3"' in result
|
||||
|
||||
def test_tool_description_empty(self):
|
||||
"""Test tool description when scratchpad is empty."""
|
||||
tool = ScratchpadTool()
|
||||
|
||||
assert "HOW TO USE THIS TOOL:" in tool.description
|
||||
assert 'Example: {"key": "email_data"}' in tool.description
|
||||
assert "📝 STATUS: Scratchpad is currently empty" in tool.description
|
||||
|
||||
def test_tool_description_with_data(self):
|
||||
"""Test tool description when scratchpad has data."""
|
||||
tool = ScratchpadTool(scratchpad_data={
|
||||
"emails": ["email1@test.com", "email2@test.com"],
|
||||
"results": {"count": 5, "status": "success"},
|
||||
"api_key": "secret_key_123"
|
||||
})
|
||||
|
||||
desc = tool.description
|
||||
|
||||
# Check basic structure
|
||||
assert "HOW TO USE THIS TOOL:" in desc
|
||||
assert "📦 AVAILABLE DATA IN SCRATCHPAD:" in desc
|
||||
assert "💡 EXAMPLE USAGE:" in desc
|
||||
|
||||
# Check key listings
|
||||
assert "📌 'emails': list of 2 items" in desc
|
||||
assert "📌 'results': dict with 2 items" in desc
|
||||
assert "📌 'api_key': string (14 chars)" in desc
|
||||
|
||||
# Check example uses first key
|
||||
assert 'Action Input: {"key": "emails"}' in desc
|
||||
|
||||
def test_update_scratchpad(self):
|
||||
"""Test updating scratchpad data."""
|
||||
tool = ScratchpadTool()
|
||||
|
||||
# Initially empty
|
||||
assert not tool.scratchpad_data
|
||||
|
||||
# Update with data
|
||||
new_data = {"test": "value"}
|
||||
tool.update_scratchpad(new_data)
|
||||
|
||||
assert tool.scratchpad_data == new_data
|
||||
assert "📌 'test': string (5 chars)" in tool.description
|
||||
|
||||
def test_complex_data_preview(self):
|
||||
"""Test preview generation for complex data structures."""
|
||||
tool = ScratchpadTool(scratchpad_data={
|
||||
"nested_dict": {
|
||||
"data": ["item1", "item2", "item3"]
|
||||
},
|
||||
"empty_list": [],
|
||||
"boolean_value": True,
|
||||
"number": 42
|
||||
})
|
||||
|
||||
desc = tool.description
|
||||
|
||||
# Special case for dict with 'data' key containing list
|
||||
assert "📌 'nested_dict': list of 3 items" in desc
|
||||
assert "📌 'empty_list': list of 0 items" in desc
|
||||
assert "📌 'boolean_value': bool" in desc
|
||||
assert "📌 'number': int" in desc
|
||||
Reference in New Issue
Block a user