mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-30 23:02:50 +00:00
* feat: introduce PlanningConfig for enhanced agent planning capabilities This update adds a new PlanningConfig class to manage agent planning configurations, allowing for customizable planning behavior before task execution. The existing reasoning parameter is deprecated in favor of this new configuration, ensuring backward compatibility while enhancing the planning process. Additionally, the Agent class has been updated to utilize this new configuration, and relevant utility functions have been adjusted accordingly. Tests have been added to validate the new planning functionality and ensure proper integration with existing agent workflows. * dropping redundancy * fix test * revert handle_reasoning here * refactor: update reasoning handling in Agent class This commit modifies the Agent class to conditionally call the handle_reasoning function based on the executor class being used. The legacy CrewAgentExecutor will continue to utilize handle_reasoning, while the new AgentExecutor will manage planning internally. Additionally, the PlanningConfig class has been referenced in the documentation to clarify its role in enabling or disabling planning. Tests have been updated to reflect these changes and ensure proper functionality. * improve planning prompts * matching * refactor: remove default enabled flag from PlanningConfig in Agent class * more cassettes * fix test * feat: enhance agent planning with structured todo management This commit introduces a new planning system within the AgentExecutor class, allowing for the creation of structured todo items from planning steps. The TodoList and TodoItem models have been added to facilitate tracking of plan execution. The reasoning plan now includes a list of steps, improving the clarity and organization of agent tasks. Additionally, tests have been added to validate the new planning functionality and ensure proper integration with existing workflows. * refactor: update planning prompt and remove deprecated methods in reasoning handler * improve planning prompt * improve handler * linted * linted
699 lines
25 KiB
Python
699 lines
25 KiB
Python
"""Tests for structured planning with steps and todo generation.
|
|
|
|
These tests verify that the planning system correctly generates structured
|
|
PlanStep objects and converts them to TodoItems across different LLM providers.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from unittest.mock import MagicMock, Mock, patch
|
|
|
|
import pytest
|
|
|
|
from crewai import Agent, PlanningConfig, Task
|
|
from crewai.llm import LLM
|
|
from crewai.utilities.planning_types import PlanStep, TodoItem, TodoList
|
|
from crewai.utilities.reasoning_handler import (
|
|
FUNCTION_SCHEMA,
|
|
AgentReasoning,
|
|
ReasoningPlan,
|
|
)
|
|
|
|
|
|
class TestFunctionSchema:
|
|
"""Tests for the FUNCTION_SCHEMA used in structured planning."""
|
|
|
|
def test_schema_has_required_structure(self):
|
|
"""Test that FUNCTION_SCHEMA has the correct structure."""
|
|
assert FUNCTION_SCHEMA["type"] == "function"
|
|
assert "function" in FUNCTION_SCHEMA
|
|
assert FUNCTION_SCHEMA["function"]["name"] == "create_reasoning_plan"
|
|
|
|
def test_schema_parameters_structure(self):
|
|
"""Test that parameters have correct structure."""
|
|
params = FUNCTION_SCHEMA["function"]["parameters"]
|
|
assert params["type"] == "object"
|
|
assert "properties" in params
|
|
assert "required" in params
|
|
|
|
def test_schema_has_plan_property(self):
|
|
"""Test that schema includes plan property."""
|
|
props = FUNCTION_SCHEMA["function"]["parameters"]["properties"]
|
|
assert "plan" in props
|
|
assert props["plan"]["type"] == "string"
|
|
|
|
def test_schema_has_steps_property(self):
|
|
"""Test that schema includes steps array property."""
|
|
props = FUNCTION_SCHEMA["function"]["parameters"]["properties"]
|
|
assert "steps" in props
|
|
assert props["steps"]["type"] == "array"
|
|
|
|
def test_schema_steps_items_structure(self):
|
|
"""Test that steps items have correct structure."""
|
|
items = FUNCTION_SCHEMA["function"]["parameters"]["properties"]["steps"]["items"]
|
|
assert items["type"] == "object"
|
|
assert "properties" in items
|
|
assert "required" in items
|
|
assert "additionalProperties" in items
|
|
assert items["additionalProperties"] is False
|
|
|
|
def test_schema_step_properties(self):
|
|
"""Test that step items have all required properties."""
|
|
step_props = FUNCTION_SCHEMA["function"]["parameters"]["properties"]["steps"]["items"]["properties"]
|
|
|
|
assert "step_number" in step_props
|
|
assert step_props["step_number"]["type"] == "integer"
|
|
|
|
assert "description" in step_props
|
|
assert step_props["description"]["type"] == "string"
|
|
|
|
assert "tool_to_use" in step_props
|
|
# tool_to_use should be nullable
|
|
assert step_props["tool_to_use"]["type"] == ["string", "null"]
|
|
|
|
assert "depends_on" in step_props
|
|
assert step_props["depends_on"]["type"] == "array"
|
|
|
|
def test_schema_step_required_fields(self):
|
|
"""Test that step required fields are correct."""
|
|
required = FUNCTION_SCHEMA["function"]["parameters"]["properties"]["steps"]["items"]["required"]
|
|
assert "step_number" in required
|
|
assert "description" in required
|
|
assert "tool_to_use" in required
|
|
assert "depends_on" in required
|
|
|
|
def test_schema_has_ready_property(self):
|
|
"""Test that schema includes ready property."""
|
|
props = FUNCTION_SCHEMA["function"]["parameters"]["properties"]
|
|
assert "ready" in props
|
|
assert props["ready"]["type"] == "boolean"
|
|
|
|
def test_schema_top_level_required(self):
|
|
"""Test that top-level required fields are correct."""
|
|
required = FUNCTION_SCHEMA["function"]["parameters"]["required"]
|
|
assert "plan" in required
|
|
assert "steps" in required
|
|
assert "ready" in required
|
|
|
|
def test_schema_top_level_additional_properties(self):
|
|
"""Test that additionalProperties is False at top level."""
|
|
params = FUNCTION_SCHEMA["function"]["parameters"]
|
|
assert params["additionalProperties"] is False
|
|
|
|
|
|
class TestReasoningPlan:
|
|
"""Tests for the ReasoningPlan model with structured steps."""
|
|
|
|
def test_reasoning_plan_with_empty_steps(self):
|
|
"""Test ReasoningPlan can be created with empty steps."""
|
|
plan = ReasoningPlan(
|
|
plan="Simple plan",
|
|
steps=[],
|
|
ready=True,
|
|
)
|
|
|
|
assert plan.plan == "Simple plan"
|
|
assert plan.steps == []
|
|
assert plan.ready is True
|
|
|
|
def test_reasoning_plan_with_steps(self):
|
|
"""Test ReasoningPlan with structured steps."""
|
|
steps = [
|
|
PlanStep(step_number=1, description="First step", tool_to_use="tool1"),
|
|
PlanStep(step_number=2, description="Second step", depends_on=[1]),
|
|
]
|
|
|
|
plan = ReasoningPlan(
|
|
plan="Multi-step plan",
|
|
steps=steps,
|
|
ready=True,
|
|
)
|
|
|
|
assert plan.plan == "Multi-step plan"
|
|
assert len(plan.steps) == 2
|
|
assert plan.steps[0].step_number == 1
|
|
assert plan.steps[1].depends_on == [1]
|
|
|
|
|
|
class TestAgentReasoningWithMockedLLM:
|
|
"""Tests for AgentReasoning with mocked LLM responses."""
|
|
|
|
@pytest.fixture
|
|
def mock_agent(self):
|
|
"""Create a mock agent for testing."""
|
|
agent = MagicMock()
|
|
agent.role = "Test Agent"
|
|
agent.goal = "Test goal"
|
|
agent.backstory = "Test backstory"
|
|
agent.verbose = False
|
|
agent.planning_config = PlanningConfig()
|
|
agent.i18n = MagicMock()
|
|
agent.i18n.retrieve.return_value = "Test prompt: {description}"
|
|
# Mock the llm attribute
|
|
agent.llm = MagicMock()
|
|
agent.llm.supports_function_calling.return_value = True
|
|
return agent
|
|
|
|
def test_parse_steps_from_function_response(self, mock_agent):
|
|
"""Test that steps are correctly parsed from LLM function response."""
|
|
# Mock the LLM response with structured steps
|
|
mock_response = json.dumps({
|
|
"plan": "Research and analyze",
|
|
"steps": [
|
|
{
|
|
"step_number": 1,
|
|
"description": "Search for information",
|
|
"tool_to_use": "search_tool",
|
|
"depends_on": [],
|
|
},
|
|
{
|
|
"step_number": 2,
|
|
"description": "Analyze results",
|
|
"tool_to_use": None,
|
|
"depends_on": [1],
|
|
},
|
|
],
|
|
"ready": True,
|
|
})
|
|
|
|
mock_agent.llm.call.return_value = mock_response
|
|
|
|
handler = AgentReasoning(
|
|
agent=mock_agent,
|
|
task=None,
|
|
description="Test task",
|
|
expected_output="Test output",
|
|
)
|
|
|
|
# Call the function parsing method
|
|
plan, steps, ready = handler._call_with_function(
|
|
prompt="Test prompt",
|
|
plan_type="create_plan",
|
|
)
|
|
|
|
assert plan == "Research and analyze"
|
|
assert len(steps) == 2
|
|
assert steps[0].step_number == 1
|
|
assert steps[0].tool_to_use == "search_tool"
|
|
assert steps[1].depends_on == [1]
|
|
assert ready is True
|
|
|
|
def test_parse_steps_handles_missing_optional_fields(self, mock_agent):
|
|
"""Test that missing optional fields are handled correctly."""
|
|
mock_response = json.dumps({
|
|
"plan": "Simple plan",
|
|
"steps": [
|
|
{
|
|
"step_number": 1,
|
|
"description": "Do something",
|
|
"tool_to_use": None,
|
|
"depends_on": [],
|
|
},
|
|
],
|
|
"ready": True,
|
|
})
|
|
|
|
mock_agent.llm.call.return_value = mock_response
|
|
|
|
handler = AgentReasoning(
|
|
agent=mock_agent,
|
|
task=None,
|
|
description="Test task",
|
|
expected_output="Test output",
|
|
)
|
|
|
|
plan, steps, ready = handler._call_with_function(
|
|
prompt="Test prompt",
|
|
plan_type="create_plan",
|
|
)
|
|
|
|
assert len(steps) == 1
|
|
assert steps[0].tool_to_use is None
|
|
assert steps[0].depends_on == []
|
|
|
|
def test_parse_steps_with_missing_fields_uses_defaults(self, mock_agent):
|
|
"""Test that steps with missing fields get default values."""
|
|
mock_response = json.dumps({
|
|
"plan": "Plan with step missing fields",
|
|
"steps": [
|
|
{"step_number": 1, "description": "Valid step", "tool_to_use": None, "depends_on": []},
|
|
{"step_number": 2}, # Missing description, tool_to_use, depends_on
|
|
{"step_number": 3, "description": "Another valid", "tool_to_use": None, "depends_on": []},
|
|
],
|
|
"ready": True,
|
|
})
|
|
|
|
mock_agent.llm.call.return_value = mock_response
|
|
|
|
handler = AgentReasoning(
|
|
agent=mock_agent,
|
|
task=None,
|
|
description="Test task",
|
|
expected_output="Test output",
|
|
)
|
|
|
|
plan, steps, ready = handler._call_with_function(
|
|
prompt="Test prompt",
|
|
plan_type="create_plan",
|
|
)
|
|
|
|
# All 3 steps should be parsed, with defaults for missing fields
|
|
assert len(steps) == 3
|
|
assert steps[0].step_number == 1
|
|
assert steps[0].description == "Valid step"
|
|
assert steps[1].step_number == 2
|
|
assert steps[1].description == "" # Default value
|
|
assert steps[2].step_number == 3
|
|
|
|
|
|
class TestTodoCreationFromPlan:
|
|
"""Tests for converting plan steps to todo items."""
|
|
|
|
def test_create_todos_from_plan_steps(self):
|
|
"""Test creating TodoList from PlanSteps."""
|
|
steps = [
|
|
PlanStep(
|
|
step_number=1,
|
|
description="Research competitors",
|
|
tool_to_use="search_tool",
|
|
depends_on=[],
|
|
),
|
|
PlanStep(
|
|
step_number=2,
|
|
description="Analyze data",
|
|
tool_to_use=None,
|
|
depends_on=[1],
|
|
),
|
|
PlanStep(
|
|
step_number=3,
|
|
description="Generate report",
|
|
tool_to_use="write_tool",
|
|
depends_on=[1, 2],
|
|
),
|
|
]
|
|
|
|
# Convert steps to todos (mirroring agent_executor._create_todos_from_plan)
|
|
todos = []
|
|
for step in steps:
|
|
todo = TodoItem(
|
|
step_number=step.step_number,
|
|
description=step.description,
|
|
tool_to_use=step.tool_to_use,
|
|
depends_on=step.depends_on,
|
|
status="pending",
|
|
)
|
|
todos.append(todo)
|
|
|
|
todo_list = TodoList(items=todos)
|
|
|
|
assert len(todo_list.items) == 3
|
|
assert todo_list.pending_count == 3
|
|
assert todo_list.completed_count == 0
|
|
|
|
# Verify todo properties match step properties
|
|
assert todo_list.items[0].description == "Research competitors"
|
|
assert todo_list.items[0].tool_to_use == "search_tool"
|
|
assert todo_list.items[1].depends_on == [1]
|
|
assert todo_list.items[2].depends_on == [1, 2]
|
|
|
|
|
|
# =============================================================================
|
|
# Provider-Specific Integration Tests (VCR recorded)
|
|
# =============================================================================
|
|
|
|
|
|
# Common test tools used across provider tests
|
|
def create_research_tools():
|
|
"""Create research tools for testing structured planning."""
|
|
from crewai.tools import tool
|
|
|
|
@tool
|
|
def web_search(query: str) -> str:
|
|
"""Search the web for information on a given topic.
|
|
|
|
Args:
|
|
query: The search query to look up.
|
|
|
|
Returns:
|
|
Search results as a string.
|
|
"""
|
|
# Simulated search results for testing
|
|
return f"Search results for '{query}': Found 3 relevant articles about the topic including market analysis, competitor data, and industry trends."
|
|
|
|
@tool
|
|
def read_website(url: str) -> str:
|
|
"""Read and extract content from a website URL.
|
|
|
|
Args:
|
|
url: The URL of the website to read.
|
|
|
|
Returns:
|
|
The extracted content from the website.
|
|
"""
|
|
# Simulated website content for testing
|
|
return f"Content from {url}: This article discusses key insights about the topic including market size ($50B), growth rate (15% YoY), and major players in the industry."
|
|
|
|
@tool
|
|
def generate_report(title: str, findings: str) -> str:
|
|
"""Generate a structured report based on research findings.
|
|
|
|
Args:
|
|
title: The title of the report.
|
|
findings: The research findings to include.
|
|
|
|
Returns:
|
|
A formatted report string.
|
|
"""
|
|
return f"# {title}\n\n## Executive Summary\n{findings}\n\n## Conclusion\nBased on the analysis, the market shows strong growth potential."
|
|
|
|
return web_search, read_website, generate_report
|
|
|
|
|
|
RESEARCH_TASK = """Research the current state of the AI agent market:
|
|
1. Search for recent information about AI agents and their market trends
|
|
2. Read detailed content from a relevant industry source
|
|
3. Generate a brief report summarizing the key findings
|
|
|
|
Use the available tools for each step."""
|
|
|
|
|
|
class TestOpenAIStructuredPlanning:
|
|
"""Integration tests for OpenAI structured planning with research workflow."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_openai_research_workflow_generates_steps(self):
|
|
"""Test that OpenAI generates structured plan steps for a research task."""
|
|
web_search, read_website, generate_report = create_research_tools()
|
|
llm = LLM(model="gpt-4o")
|
|
|
|
agent = Agent(
|
|
role="Research Analyst",
|
|
goal="Conduct thorough research and produce insightful reports",
|
|
backstory="An experienced analyst skilled at gathering information and synthesizing findings into actionable insights.",
|
|
llm=llm,
|
|
tools=[web_search, read_website, generate_report],
|
|
planning_config=PlanningConfig(max_attempts=1),
|
|
verbose=False,
|
|
)
|
|
|
|
result = agent.kickoff(RESEARCH_TASK)
|
|
|
|
# Verify result exists
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
# The result should contain some report-like content
|
|
assert len(str(result.raw)) > 50
|
|
|
|
|
|
class TestAnthropicStructuredPlanning:
|
|
"""Integration tests for Anthropic structured planning with research workflow."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_anthropic_api_key(self):
|
|
"""Mock API key if not set."""
|
|
if "ANTHROPIC_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_research_workflow_generates_steps(self):
|
|
"""Test that Anthropic generates structured plan steps for a research task."""
|
|
web_search, read_website, generate_report = create_research_tools()
|
|
llm = LLM(model="anthropic/claude-sonnet-4-20250514")
|
|
|
|
agent = Agent(
|
|
role="Research Analyst",
|
|
goal="Conduct thorough research and produce insightful reports",
|
|
backstory="An experienced analyst skilled at gathering information and synthesizing findings into actionable insights.",
|
|
llm=llm,
|
|
tools=[web_search, read_website, generate_report],
|
|
planning_config=PlanningConfig(max_attempts=1),
|
|
verbose=False,
|
|
)
|
|
|
|
result = agent.kickoff(RESEARCH_TASK)
|
|
|
|
# Verify result exists
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
# The result should contain some report-like content
|
|
assert len(str(result.raw)) > 50
|
|
|
|
|
|
class TestGeminiStructuredPlanning:
|
|
"""Integration tests for Google Gemini structured planning with research workflow."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_google_api_key(self):
|
|
"""Mock API key if not set."""
|
|
if "GOOGLE_API_KEY" not in os.environ and "GEMINI_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
@pytest.mark.vcr()
|
|
def test_gemini_research_workflow_generates_steps(self):
|
|
"""Test that Gemini generates structured plan steps for a research task."""
|
|
web_search, read_website, generate_report = create_research_tools()
|
|
llm = LLM(model="gemini/gemini-2.5-flash")
|
|
|
|
agent = Agent(
|
|
role="Research Analyst",
|
|
goal="Conduct thorough research and produce insightful reports",
|
|
backstory="An experienced analyst skilled at gathering information and synthesizing findings into actionable insights.",
|
|
llm=llm,
|
|
tools=[web_search, read_website, generate_report],
|
|
planning_config=PlanningConfig(max_attempts=1),
|
|
verbose=False,
|
|
)
|
|
|
|
result = agent.kickoff(RESEARCH_TASK)
|
|
|
|
# Verify result exists
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
# The result should contain some report-like content
|
|
assert len(str(result.raw)) > 50
|
|
|
|
|
|
class TestAzureStructuredPlanning:
|
|
"""Integration tests for Azure OpenAI structured planning with research workflow."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_azure_credentials(self):
|
|
"""Mock Azure credentials for tests."""
|
|
if "AZURE_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, {
|
|
"AZURE_API_KEY": "test-key",
|
|
"AZURE_ENDPOINT": "https://test.openai.azure.com"
|
|
}):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
@pytest.mark.vcr()
|
|
def test_azure_research_workflow_generates_steps(self):
|
|
"""Test that Azure OpenAI generates structured plan steps for a research task."""
|
|
web_search, read_website, generate_report = create_research_tools()
|
|
llm = LLM(model="azure/gpt-4o")
|
|
|
|
agent = Agent(
|
|
role="Research Analyst",
|
|
goal="Conduct thorough research and produce insightful reports",
|
|
backstory="An experienced analyst skilled at gathering information and synthesizing findings into actionable insights.",
|
|
llm=llm,
|
|
tools=[web_search, read_website, generate_report],
|
|
planning_config=PlanningConfig(max_attempts=1),
|
|
verbose=False,
|
|
)
|
|
|
|
result = agent.kickoff(RESEARCH_TASK)
|
|
|
|
# Verify result exists
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
# The result should contain some report-like content
|
|
assert len(str(result.raw)) > 50
|
|
|
|
|
|
# =============================================================================
|
|
# Unit Tests with Mocked LLM Providers
|
|
# =============================================================================
|
|
|
|
|
|
class TestStructuredPlanningWithMockedProviders:
|
|
"""Unit tests with mocked LLM providers for faster execution."""
|
|
|
|
def _create_mock_plan_response(self, steps_data):
|
|
"""Helper to create mock plan response."""
|
|
return json.dumps({
|
|
"plan": "Test plan",
|
|
"steps": steps_data,
|
|
"ready": True,
|
|
})
|
|
|
|
def test_openai_mock_structured_response(self):
|
|
"""Test parsing OpenAI structured response."""
|
|
steps_data = [
|
|
{"step_number": 1, "description": "Search", "tool_to_use": "search", "depends_on": []},
|
|
{"step_number": 2, "description": "Analyze", "tool_to_use": None, "depends_on": [1]},
|
|
]
|
|
|
|
response = self._create_mock_plan_response(steps_data)
|
|
parsed = json.loads(response)
|
|
|
|
assert len(parsed["steps"]) == 2
|
|
assert parsed["steps"][0]["tool_to_use"] == "search"
|
|
assert parsed["steps"][1]["depends_on"] == [1]
|
|
|
|
def test_anthropic_mock_structured_response(self):
|
|
"""Test parsing Anthropic structured response (same format)."""
|
|
steps_data = [
|
|
{"step_number": 1, "description": "Research", "tool_to_use": "web_search", "depends_on": []},
|
|
{"step_number": 2, "description": "Summarize", "tool_to_use": None, "depends_on": [1]},
|
|
{"step_number": 3, "description": "Report", "tool_to_use": "write_file", "depends_on": [1, 2]},
|
|
]
|
|
|
|
response = self._create_mock_plan_response(steps_data)
|
|
parsed = json.loads(response)
|
|
|
|
assert len(parsed["steps"]) == 3
|
|
assert parsed["steps"][2]["depends_on"] == [1, 2]
|
|
|
|
def test_gemini_mock_structured_response(self):
|
|
"""Test parsing Gemini structured response (same format)."""
|
|
steps_data = [
|
|
{"step_number": 1, "description": "Gather data", "tool_to_use": "data_tool", "depends_on": []},
|
|
{"step_number": 2, "description": "Process", "tool_to_use": None, "depends_on": [1]},
|
|
]
|
|
|
|
response = self._create_mock_plan_response(steps_data)
|
|
parsed = json.loads(response)
|
|
|
|
assert len(parsed["steps"]) == 2
|
|
assert parsed["ready"] is True
|
|
|
|
def test_azure_mock_structured_response(self):
|
|
"""Test parsing Azure OpenAI structured response (same format as OpenAI)."""
|
|
steps_data = [
|
|
{"step_number": 1, "description": "Initialize", "tool_to_use": None, "depends_on": []},
|
|
{"step_number": 2, "description": "Execute", "tool_to_use": "executor", "depends_on": [1]},
|
|
{"step_number": 3, "description": "Finalize", "tool_to_use": None, "depends_on": [1, 2]},
|
|
]
|
|
|
|
response = self._create_mock_plan_response(steps_data)
|
|
parsed = json.loads(response)
|
|
|
|
assert len(parsed["steps"]) == 3
|
|
assert parsed["steps"][0]["tool_to_use"] is None
|
|
|
|
|
|
class TestTodoListIntegration:
|
|
"""Integration tests for TodoList with plan execution simulation."""
|
|
|
|
def test_full_plan_execution_workflow(self):
|
|
"""Test complete workflow from plan to todos to execution."""
|
|
# Simulate plan steps from LLM
|
|
plan_steps = [
|
|
PlanStep(
|
|
step_number=1,
|
|
description="Research the topic",
|
|
tool_to_use="search_tool",
|
|
depends_on=[],
|
|
),
|
|
PlanStep(
|
|
step_number=2,
|
|
description="Compile findings",
|
|
tool_to_use=None,
|
|
depends_on=[1],
|
|
),
|
|
PlanStep(
|
|
step_number=3,
|
|
description="Generate summary",
|
|
tool_to_use="summarize_tool",
|
|
depends_on=[1, 2],
|
|
),
|
|
]
|
|
|
|
# Convert to todos (like agent_executor._create_todos_from_plan)
|
|
todos = [
|
|
TodoItem(
|
|
step_number=step.step_number,
|
|
description=step.description,
|
|
tool_to_use=step.tool_to_use,
|
|
depends_on=step.depends_on,
|
|
status="pending",
|
|
)
|
|
for step in plan_steps
|
|
]
|
|
todo_list = TodoList(items=todos)
|
|
|
|
# Verify initial state
|
|
assert todo_list.pending_count == 3
|
|
assert todo_list.is_complete is False
|
|
|
|
# Simulate execution
|
|
for i in range(1, 4):
|
|
todo_list.mark_running(i)
|
|
assert todo_list.current_todo.step_number == i
|
|
todo_list.mark_completed(i, result=f"Step {i} completed")
|
|
|
|
# Verify final state
|
|
assert todo_list.is_complete is True
|
|
assert todo_list.completed_count == 3
|
|
assert all(item.result is not None for item in todo_list.items)
|
|
|
|
def test_dependency_aware_execution(self):
|
|
"""Test that dependencies are respected in execution order."""
|
|
steps = [
|
|
PlanStep(step_number=1, description="Base step", depends_on=[]),
|
|
PlanStep(step_number=2, description="Depends on 1", depends_on=[1]),
|
|
PlanStep(step_number=3, description="Depends on 1", depends_on=[1]),
|
|
PlanStep(step_number=4, description="Depends on 2 and 3", depends_on=[2, 3]),
|
|
]
|
|
|
|
todos = [
|
|
TodoItem(
|
|
step_number=s.step_number,
|
|
description=s.description,
|
|
depends_on=s.depends_on,
|
|
)
|
|
for s in steps
|
|
]
|
|
todo_list = TodoList(items=todos)
|
|
|
|
# Helper to check if dependencies are satisfied
|
|
def can_execute(todo: TodoItem) -> bool:
|
|
for dep in todo.depends_on:
|
|
dep_todo = todo_list.get_by_step_number(dep)
|
|
if dep_todo and dep_todo.status != "completed":
|
|
return False
|
|
return True
|
|
|
|
# Step 1 has no dependencies
|
|
assert can_execute(todo_list.items[0]) is True
|
|
|
|
# Steps 2 and 3 depend on 1 (not yet done)
|
|
assert can_execute(todo_list.items[1]) is False
|
|
assert can_execute(todo_list.items[2]) is False
|
|
|
|
# Complete step 1
|
|
todo_list.mark_completed(1)
|
|
|
|
# Now steps 2 and 3 can execute
|
|
assert can_execute(todo_list.items[1]) is True
|
|
assert can_execute(todo_list.items[2]) is True
|
|
|
|
# Step 4 still can't (depends on 2 and 3)
|
|
assert can_execute(todo_list.items[3]) is False
|
|
|
|
# Complete steps 2 and 3
|
|
todo_list.mark_completed(2)
|
|
todo_list.mark_completed(3)
|
|
|
|
# Now step 4 can execute
|
|
assert can_execute(todo_list.items[3]) is True
|