merged lorenze/enh-decouple-executor-from-crew

This commit is contained in:
lorenzejay
2026-01-20 10:41:28 -08:00
19 changed files with 2804 additions and 1317 deletions

View File

@@ -1,4 +1,4 @@
"""Unit tests for CrewAgentExecutorFlow.
"""Unit tests for AgentExecutor.
Tests the Flow-based agent executor implementation including state management,
flow methods, routing logic, and error handling.
@@ -8,9 +8,9 @@ from unittest.mock import Mock, patch
import pytest
from crewai.experimental.crew_agent_executor_flow import (
from crewai.experimental.agent_executor import (
AgentReActState,
CrewAgentExecutorFlow,
AgentExecutor,
)
from crewai.agents.parser import AgentAction, AgentFinish
@@ -43,8 +43,8 @@ class TestAgentReActState:
assert state.ask_for_human_input is True
class TestCrewAgentExecutorFlow:
"""Test CrewAgentExecutorFlow class."""
class TestAgentExecutor:
"""Test AgentExecutor class."""
@pytest.fixture
def mock_dependencies(self):
@@ -87,8 +87,8 @@ class TestCrewAgentExecutorFlow:
}
def test_executor_initialization(self, mock_dependencies):
"""Test CrewAgentExecutorFlow initialization."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
"""Test AgentExecutor initialization."""
executor = AgentExecutor(**mock_dependencies)
assert executor.llm == mock_dependencies["llm"]
assert executor.task == mock_dependencies["task"]
@@ -100,9 +100,9 @@ class TestCrewAgentExecutorFlow:
def test_initialize_reasoning(self, mock_dependencies):
"""Test flow entry point."""
with patch.object(
CrewAgentExecutorFlow, "_show_start_logs"
AgentExecutor, "_show_start_logs"
) as mock_show_start:
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
result = executor.initialize_reasoning()
assert result == "initialized"
@@ -110,7 +110,7 @@ class TestCrewAgentExecutorFlow:
def test_check_max_iterations_not_reached(self, mock_dependencies):
"""Test routing when iterations < max."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.iterations = 5
result = executor.check_max_iterations()
@@ -118,7 +118,7 @@ class TestCrewAgentExecutorFlow:
def test_check_max_iterations_reached(self, mock_dependencies):
"""Test routing when iterations >= max."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.iterations = 10
result = executor.check_max_iterations()
@@ -126,7 +126,7 @@ class TestCrewAgentExecutorFlow:
def test_route_by_answer_type_action(self, mock_dependencies):
"""Test routing for AgentAction."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.current_answer = AgentAction(
thought="thinking", tool="search", tool_input="query", text="action text"
)
@@ -136,7 +136,7 @@ class TestCrewAgentExecutorFlow:
def test_route_by_answer_type_finish(self, mock_dependencies):
"""Test routing for AgentFinish."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.current_answer = AgentFinish(
thought="final thoughts", output="Final answer", text="complete"
)
@@ -146,7 +146,7 @@ class TestCrewAgentExecutorFlow:
def test_continue_iteration(self, mock_dependencies):
"""Test iteration continuation."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
result = executor.continue_iteration()
@@ -154,8 +154,8 @@ class TestCrewAgentExecutorFlow:
def test_finalize_success(self, mock_dependencies):
"""Test finalize with valid AgentFinish."""
with patch.object(CrewAgentExecutorFlow, "_show_logs") as mock_show_logs:
executor = CrewAgentExecutorFlow(**mock_dependencies)
with patch.object(AgentExecutor, "_show_logs") as mock_show_logs:
executor = AgentExecutor(**mock_dependencies)
executor.state.current_answer = AgentFinish(
thought="final thinking", output="Done", text="complete"
)
@@ -168,7 +168,7 @@ class TestCrewAgentExecutorFlow:
def test_finalize_failure(self, mock_dependencies):
"""Test finalize skips when given AgentAction instead of AgentFinish."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.current_answer = AgentAction(
thought="thinking", tool="search", tool_input="query", text="action text"
)
@@ -181,7 +181,7 @@ class TestCrewAgentExecutorFlow:
def test_format_prompt(self, mock_dependencies):
"""Test prompt formatting."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
inputs = {"input": "test input", "tool_names": "tool1, tool2", "tools": "desc"}
result = executor._format_prompt("Prompt {input} {tool_names} {tools}", inputs)
@@ -192,18 +192,18 @@ class TestCrewAgentExecutorFlow:
def test_is_training_mode_false(self, mock_dependencies):
"""Test training mode detection when not in training."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
assert executor._is_training_mode() is False
def test_is_training_mode_true(self, mock_dependencies):
"""Test training mode detection when in training."""
mock_dependencies["crew"]._train = True
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
assert executor._is_training_mode() is True
def test_append_message_to_state(self, mock_dependencies):
"""Test message appending to state."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
initial_count = len(executor.state.messages)
executor._append_message_to_state("test message")
@@ -216,7 +216,7 @@ class TestCrewAgentExecutorFlow:
callback = Mock()
mock_dependencies["step_callback"] = callback
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
answer = AgentFinish(thought="thinking", output="test", text="final")
executor._invoke_step_callback(answer)
@@ -226,14 +226,14 @@ class TestCrewAgentExecutorFlow:
def test_invoke_step_callback_none(self, mock_dependencies):
"""Test step callback when none provided."""
mock_dependencies["step_callback"] = None
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
# Should not raise error
executor._invoke_step_callback(
AgentFinish(thought="thinking", output="test", text="final")
)
@patch("crewai.experimental.crew_agent_executor_flow.handle_output_parser_exception")
@patch("crewai.experimental.agent_executor.handle_output_parser_exception")
def test_recover_from_parser_error(
self, mock_handle_exception, mock_dependencies
):
@@ -242,7 +242,7 @@ class TestCrewAgentExecutorFlow:
mock_handle_exception.return_value = None
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor._last_parser_error = OutputParserError("test error")
initial_iterations = executor.state.iterations
@@ -252,12 +252,12 @@ class TestCrewAgentExecutorFlow:
assert executor.state.iterations == initial_iterations + 1
mock_handle_exception.assert_called_once()
@patch("crewai.experimental.crew_agent_executor_flow.handle_context_length")
@patch("crewai.experimental.agent_executor.handle_context_length")
def test_recover_from_context_length(
self, mock_handle_context, mock_dependencies
):
"""Test recovery from context length error."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor._last_context_error = Exception("context too long")
initial_iterations = executor.state.iterations
@@ -270,16 +270,16 @@ class TestCrewAgentExecutorFlow:
def test_use_stop_words_property(self, mock_dependencies):
"""Test use_stop_words property."""
mock_dependencies["llm"].supports_stop_words.return_value = True
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
assert executor.use_stop_words is True
mock_dependencies["llm"].supports_stop_words.return_value = False
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
assert executor.use_stop_words is False
def test_compatibility_properties(self, mock_dependencies):
"""Test compatibility properties for mixin."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.messages = [{"role": "user", "content": "test"}]
executor.state.iterations = 5
@@ -321,8 +321,8 @@ class TestFlowErrorHandling:
"tools_handler": Mock(),
}
@patch("crewai.experimental.crew_agent_executor_flow.get_llm_response")
@patch("crewai.experimental.crew_agent_executor_flow.enforce_rpm_limit")
@patch("crewai.experimental.agent_executor.get_llm_response")
@patch("crewai.experimental.agent_executor.enforce_rpm_limit")
def test_call_llm_parser_error(
self, mock_enforce_rpm, mock_get_llm, mock_dependencies
):
@@ -332,15 +332,15 @@ class TestFlowErrorHandling:
mock_enforce_rpm.return_value = None
mock_get_llm.side_effect = OutputParserError("parse failed")
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
result = executor.call_llm_and_parse()
assert result == "parser_error"
assert executor._last_parser_error is not None
@patch("crewai.experimental.crew_agent_executor_flow.get_llm_response")
@patch("crewai.experimental.crew_agent_executor_flow.enforce_rpm_limit")
@patch("crewai.experimental.crew_agent_executor_flow.is_context_length_exceeded")
@patch("crewai.experimental.agent_executor.get_llm_response")
@patch("crewai.experimental.agent_executor.enforce_rpm_limit")
@patch("crewai.experimental.agent_executor.is_context_length_exceeded")
def test_call_llm_context_error(
self,
mock_is_context_exceeded,
@@ -353,7 +353,7 @@ class TestFlowErrorHandling:
mock_get_llm.side_effect = Exception("context length")
mock_is_context_exceeded.return_value = True
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
result = executor.call_llm_and_parse()
assert result == "context_error"
@@ -397,10 +397,10 @@ class TestFlowInvoke:
"tools_handler": Mock(),
}
@patch.object(CrewAgentExecutorFlow, "kickoff")
@patch.object(CrewAgentExecutorFlow, "_create_short_term_memory")
@patch.object(CrewAgentExecutorFlow, "_create_long_term_memory")
@patch.object(CrewAgentExecutorFlow, "_create_external_memory")
@patch.object(AgentExecutor, "kickoff")
@patch.object(AgentExecutor, "_create_short_term_memory")
@patch.object(AgentExecutor, "_create_long_term_memory")
@patch.object(AgentExecutor, "_create_external_memory")
def test_invoke_success(
self,
mock_external_memory,
@@ -410,7 +410,7 @@ class TestFlowInvoke:
mock_dependencies,
):
"""Test successful invoke without human feedback."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
# Mock kickoff to set the final answer in state
def mock_kickoff_side_effect():
@@ -429,10 +429,10 @@ class TestFlowInvoke:
mock_long_term_memory.assert_called_once()
mock_external_memory.assert_called_once()
@patch.object(CrewAgentExecutorFlow, "kickoff")
@patch.object(AgentExecutor, "kickoff")
def test_invoke_failure_no_agent_finish(self, mock_kickoff, mock_dependencies):
"""Test invoke fails without AgentFinish."""
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
executor.state.current_answer = AgentAction(
thought="thinking", tool="test", tool_input="test", text="action text"
)
@@ -442,10 +442,10 @@ class TestFlowInvoke:
with pytest.raises(RuntimeError, match="without reaching a final answer"):
executor.invoke(inputs)
@patch.object(CrewAgentExecutorFlow, "kickoff")
@patch.object(CrewAgentExecutorFlow, "_create_short_term_memory")
@patch.object(CrewAgentExecutorFlow, "_create_long_term_memory")
@patch.object(CrewAgentExecutorFlow, "_create_external_memory")
@patch.object(AgentExecutor, "kickoff")
@patch.object(AgentExecutor, "_create_short_term_memory")
@patch.object(AgentExecutor, "_create_long_term_memory")
@patch.object(AgentExecutor, "_create_external_memory")
def test_invoke_with_system_prompt(
self,
mock_external_memory,
@@ -459,7 +459,7 @@ class TestFlowInvoke:
"system": "System: {input}",
"user": "User: {input} {tool_names} {tools}",
}
executor = CrewAgentExecutorFlow(**mock_dependencies)
executor = AgentExecutor(**mock_dependencies)
def mock_kickoff_side_effect():
executor.state.current_answer = AgentFinish(

View File

@@ -72,62 +72,53 @@ class ResearchResult(BaseModel):
@pytest.mark.vcr()
@pytest.mark.parametrize("verbose", [True, False])
def test_lite_agent_created_with_correct_parameters(monkeypatch, verbose):
"""Test that LiteAgent is created with the correct parameters when Agent.kickoff() is called."""
def test_agent_kickoff_preserves_parameters(verbose):
"""Test that Agent.kickoff() uses the correct parameters from the Agent."""
# Create a test agent with specific parameters
llm = LLM(model="gpt-4o-mini")
mock_llm = Mock(spec=LLM)
mock_llm.call.return_value = "Final Answer: Test response"
mock_llm.stop = []
from crewai.types.usage_metrics import UsageMetrics
mock_usage_metrics = UsageMetrics(
total_tokens=100,
prompt_tokens=50,
completion_tokens=50,
cached_prompt_tokens=0,
successful_requests=1,
)
mock_llm.get_token_usage_summary.return_value = mock_usage_metrics
custom_tools = [WebSearchTool(), CalculatorTool()]
max_iter = 10
max_execution_time = 300
agent = Agent(
role="Test Agent",
goal="Test Goal",
backstory="Test Backstory",
llm=llm,
llm=mock_llm,
tools=custom_tools,
max_iter=max_iter,
max_execution_time=max_execution_time,
verbose=verbose,
)
# Create a mock to capture the created LiteAgent
created_lite_agent = None
original_lite_agent = LiteAgent
# Call kickoff and verify it works
result = agent.kickoff("Test query")
# Define a mock LiteAgent class that captures its arguments
class MockLiteAgent(original_lite_agent):
def __init__(self, **kwargs):
nonlocal created_lite_agent
created_lite_agent = kwargs
super().__init__(**kwargs)
# Verify the agent was configured correctly
assert agent.role == "Test Agent"
assert agent.goal == "Test Goal"
assert agent.backstory == "Test Backstory"
assert len(agent.tools) == 2
assert isinstance(agent.tools[0], WebSearchTool)
assert isinstance(agent.tools[1], CalculatorTool)
assert agent.max_iter == max_iter
assert agent.verbose == verbose
# Patch the LiteAgent class
monkeypatch.setattr("crewai.agent.core.LiteAgent", MockLiteAgent)
# Call kickoff to create the LiteAgent
agent.kickoff("Test query")
# Verify all parameters were passed correctly
assert created_lite_agent is not None
assert created_lite_agent["role"] == "Test Agent"
assert created_lite_agent["goal"] == "Test Goal"
assert created_lite_agent["backstory"] == "Test Backstory"
assert created_lite_agent["llm"] == llm
assert len(created_lite_agent["tools"]) == 2
assert isinstance(created_lite_agent["tools"][0], WebSearchTool)
assert isinstance(created_lite_agent["tools"][1], CalculatorTool)
assert created_lite_agent["max_iterations"] == max_iter
assert created_lite_agent["max_execution_time"] == max_execution_time
assert created_lite_agent["verbose"] == verbose
assert created_lite_agent["response_format"] is None
# Test with a response_format
class TestResponse(BaseModel):
test_field: str
agent.kickoff("Test query", response_format=TestResponse)
assert created_lite_agent["response_format"] == TestResponse
# Verify kickoff returned a result
assert result is not None
assert result.raw is not None
@pytest.mark.vcr()
@@ -310,7 +301,8 @@ def verify_agent_parent_flow(result, agent, flow):
def test_sets_parent_flow_when_inside_flow():
captured_agent = None
"""Test that an Agent can be created and executed inside a Flow context."""
captured_event = None
mock_llm = Mock(spec=LLM)
mock_llm.call.return_value = "Test response"
@@ -343,15 +335,17 @@ def test_sets_parent_flow_when_inside_flow():
event_received = threading.Event()
@crewai_event_bus.on(LiteAgentExecutionStartedEvent)
def capture_agent(source, event):
nonlocal captured_agent
captured_agent = source
def capture_event(source, event):
nonlocal captured_event
captured_event = event
event_received.set()
flow.kickoff()
result = flow.kickoff()
assert event_received.wait(timeout=5), "Timeout waiting for agent execution event"
assert captured_agent.parent_flow is flow
assert captured_event is not None
assert captured_event.agent_info["role"] == "Test Agent"
assert result is not None
@pytest.mark.vcr()
@@ -373,16 +367,14 @@ def test_guardrail_is_called_using_string():
@crewai_event_bus.on(LLMGuardrailStartedEvent)
def capture_guardrail_started(source, event):
assert isinstance(source, LiteAgent)
assert source.original_agent == agent
assert isinstance(source, Agent)
with condition:
guardrail_events["started"].append(event)
condition.notify()
@crewai_event_bus.on(LLMGuardrailCompletedEvent)
def capture_guardrail_completed(source, event):
assert isinstance(source, LiteAgent)
assert source.original_agent == agent
assert isinstance(source, Agent)
with condition:
guardrail_events["completed"].append(event)
condition.notify()
@@ -683,3 +675,151 @@ def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools):
# Verify MCP tools were retrieved
mock_get_mcp_tools.assert_called_once_with("https://mcp.exa.ai/mcp?api_key=test_exa_key&profile=research")
# ============================================================================
# Tests for LiteAgent inside Flow (magic auto-async pattern)
# ============================================================================
from crewai.flow.flow import listen
@pytest.mark.vcr()
def test_lite_agent_inside_flow_sync():
"""Test that LiteAgent.kickoff() works magically inside a Flow.
This tests the "magic auto-async" pattern where calling agent.kickoff()
from within a Flow automatically detects the event loop and returns a
coroutine that the Flow framework awaits. Users don't need to use async/await.
"""
# Track execution
execution_log = []
class TestFlow(Flow):
@start()
def run_agent(self):
execution_log.append("flow_started")
agent = Agent(
role="Test Agent",
goal="Answer questions",
backstory="A helpful test assistant",
llm=LLM(model="gpt-4o-mini"),
verbose=False,
)
# Magic: just call kickoff() normally - it auto-detects Flow context
result = agent.kickoff(messages="What is 2+2? Reply with just the number.")
execution_log.append("agent_completed")
return result
flow = TestFlow()
result = flow.kickoff()
# Verify the flow executed successfully
assert "flow_started" in execution_log
assert "agent_completed" in execution_log
assert result is not None
assert isinstance(result, LiteAgentOutput)
@pytest.mark.vcr()
def test_lite_agent_inside_flow_with_tools():
"""Test that LiteAgent with tools works correctly inside a Flow."""
class TestFlow(Flow):
@start()
def run_agent_with_tools(self):
agent = Agent(
role="Calculator Agent",
goal="Perform calculations",
backstory="A math expert",
llm=LLM(model="gpt-4o-mini"),
tools=[CalculatorTool()],
verbose=False,
)
result = agent.kickoff(messages="Calculate 10 * 5")
return result
flow = TestFlow()
result = flow.kickoff()
assert result is not None
assert isinstance(result, LiteAgentOutput)
assert result.raw is not None
@pytest.mark.vcr()
def test_multiple_agents_in_same_flow():
"""Test that multiple LiteAgents can run sequentially in the same Flow."""
class MultiAgentFlow(Flow):
@start()
def first_step(self):
agent1 = Agent(
role="First Agent",
goal="Greet users",
backstory="A friendly greeter",
llm=LLM(model="gpt-4o-mini"),
verbose=False,
)
return agent1.kickoff(messages="Say hello")
@listen(first_step)
def second_step(self, first_result):
agent2 = Agent(
role="Second Agent",
goal="Say goodbye",
backstory="A polite farewell agent",
llm=LLM(model="gpt-4o-mini"),
verbose=False,
)
return agent2.kickoff(messages="Say goodbye")
flow = MultiAgentFlow()
result = flow.kickoff()
assert result is not None
assert isinstance(result, LiteAgentOutput)
@pytest.mark.vcr()
def test_lite_agent_kickoff_async_inside_flow():
"""Test that Agent.kickoff_async() works correctly from async Flow methods."""
class AsyncAgentFlow(Flow):
@start()
async def async_agent_step(self):
agent = Agent(
role="Async Test Agent",
goal="Answer questions asynchronously",
backstory="An async helper",
llm=LLM(model="gpt-4o-mini"),
verbose=False,
)
result = await agent.kickoff_async(messages="What is 3+3?")
return result
flow = AsyncAgentFlow()
result = flow.kickoff()
assert result is not None
assert isinstance(result, LiteAgentOutput)
@pytest.mark.vcr()
def test_lite_agent_standalone_still_works():
"""Test that LiteAgent.kickoff() still works normally outside of a Flow.
This verifies that the magic auto-async pattern doesn't break standalone usage
where there's no event loop running.
"""
agent = Agent(
role="Standalone Agent",
goal="Answer questions",
backstory="A helpful assistant",
llm=LLM(model="gpt-4o-mini"),
verbose=False,
)
# This should work normally - no Flow, no event loop
result = agent.kickoff(messages="What is 5+5? Reply with just the number.")
assert result is not None
assert isinstance(result, LiteAgentOutput)
assert result.raw is not None