mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-07 10:12:38 +00:00
The two parallel agent-executor implementations (CrewAgentExecutor and
the experimental AgentExecutor) had drifted enough that the Agent class
silently routed kickoff() and execute_task() to different executors:
* Agent.execute_task() instantiated self.executor_class (default
CrewAgentExecutor).
* Agent.kickoff() hard-coded AgentExecutor, ignoring executor_class.
* The reasoning-gate guard 'self.executor_class is not AgentExecutor'
coupled feature behavior to literal class identity, so a subclass of
either executor that opted into / out of internal planning was
handled incorrectly.
This patch introduces two ClassVar capability flags on
BaseAgentExecutor:
* supports_internal_planning - whether the executor runs its own
planning/replanning loop (and therefore should NOT have the external
handle_reasoning step run before task execution).
* supports_kickoff - whether the executor implements the Flow-based
kickoff entry point.
AgentExecutor sets both to True; CrewAgentExecutor leaves both False.
The Agent class now consults these flags instead of testing class
identity:
* The reasoning gate is 'not getattr(executor_class,
supports_internal_planning, False)'.
* Agent.kickoff() routes through _resolve_kickoff_executor_class()
which honors executor_class when it advertises supports_kickoff and
otherwise emits a DeprecationWarning before falling back to
AgentExecutor for backwards compatibility.
A new test module
lib/crewai/tests/agents/test_executor_consistency.py pins down the
contract.
Co-Authored-By: João <joao@crewai.com>
350 lines
14 KiB
Python
350 lines
14 KiB
Python
"""Regression tests for issue #5736: parallel agent executors must be consistent.
|
|
|
|
The repository has historically carried two parallel ``BaseAgentExecutor``
|
|
subclasses — :class:`crewai.agents.crew_agent_executor.CrewAgentExecutor` and
|
|
:class:`crewai.experimental.agent_executor.AgentExecutor`. The issue reported
|
|
two concrete bugs caused by the split:
|
|
|
|
1. ``Agent.kickoff()`` silently ignored ``Agent.executor_class`` and always
|
|
instantiated ``AgentExecutor``, so the user-visible default
|
|
(``CrewAgentExecutor``) did not match the executor that ``kickoff()``
|
|
actually used.
|
|
2. The ``handle_reasoning()`` step was gated on a literal class identity check
|
|
(``self.executor_class is not AgentExecutor``), so reasoning was silently
|
|
skipped depending on the executor and could not be controlled via a
|
|
capability flag.
|
|
|
|
These tests pin down the new contract:
|
|
|
|
* Capability flags (``supports_internal_planning`` / ``supports_kickoff``) are
|
|
the single source of truth for executor differences.
|
|
* ``Agent.kickoff()`` honors ``executor_class`` when it is kickoff-capable and
|
|
otherwise emits a ``DeprecationWarning`` instead of silently overriding.
|
|
* ``Agent.execute_task()`` instantiates ``self.executor_class`` (not a
|
|
hard-coded class).
|
|
* ``handle_reasoning()`` is gated on the capability flag, not class identity.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, ClassVar, Literal
|
|
import warnings
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from crewai import Agent, Task
|
|
from crewai.agents.agent_builder.base_agent_executor import BaseAgentExecutor
|
|
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
|
from crewai.experimental.agent_executor import AgentExecutor
|
|
from crewai.llms.base_llm import BaseLLM
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_llm() -> MagicMock:
|
|
"""Create a mock LLM that satisfies BaseLLM's protocol."""
|
|
llm = MagicMock(spec=BaseLLM)
|
|
llm.supports_stop_words.return_value = True
|
|
llm.stop = []
|
|
llm.model = "gpt-4o-mini"
|
|
return llm
|
|
|
|
|
|
@pytest.fixture
|
|
def test_agent(mock_llm: MagicMock) -> Agent:
|
|
"""A plain Agent that uses default ``executor_class``."""
|
|
return Agent(
|
|
role="Test Agent",
|
|
goal="Verify executor consistency",
|
|
backstory="An agent used by issue #5736 regression tests.",
|
|
llm=mock_llm,
|
|
verbose=False,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Capability flags
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExecutorCapabilityFlags:
|
|
"""``supports_internal_planning`` / ``supports_kickoff`` advertise the
|
|
differences between executors so callers don't have to ``isinstance``-check
|
|
against a specific class."""
|
|
|
|
def test_base_executor_has_no_capabilities(self) -> None:
|
|
assert BaseAgentExecutor.supports_internal_planning is False
|
|
assert BaseAgentExecutor.supports_kickoff is False
|
|
|
|
def test_crew_agent_executor_has_no_capabilities(self) -> None:
|
|
"""The legacy executor neither plans internally nor handles kickoff."""
|
|
assert CrewAgentExecutor.supports_internal_planning is False
|
|
assert CrewAgentExecutor.supports_kickoff is False
|
|
|
|
def test_agent_executor_advertises_full_capabilities(self) -> None:
|
|
"""The Flow-based executor implements both plan-and-execute and kickoff."""
|
|
assert AgentExecutor.supports_internal_planning is True
|
|
assert AgentExecutor.supports_kickoff is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Agent.kickoff() honors executor_class
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestKickoffRespectsExecutorClass:
|
|
"""Issue #5736 part 1: ``Agent.kickoff()`` must honor ``executor_class``."""
|
|
|
|
def test_kickoff_uses_explicit_agent_executor_without_warning(
|
|
self, mock_llm: MagicMock
|
|
) -> None:
|
|
"""When the user explicitly opts into ``AgentExecutor``, no warning fires."""
|
|
agent = Agent(
|
|
role="r",
|
|
goal="g",
|
|
backstory="b",
|
|
llm=mock_llm,
|
|
executor_class=AgentExecutor,
|
|
)
|
|
with warnings.catch_warnings(record=True) as caught:
|
|
warnings.simplefilter("always")
|
|
executor_cls = agent._resolve_kickoff_executor_class()
|
|
|
|
assert executor_cls is AgentExecutor
|
|
kickoff_warnings = [
|
|
w for w in caught if "Agent.kickoff()" in str(w.message)
|
|
]
|
|
assert kickoff_warnings == [], (
|
|
f"Did not expect any kickoff warning, got: {[str(w.message) for w in kickoff_warnings]}"
|
|
)
|
|
|
|
def test_kickoff_uses_kickoff_capable_subclass_of_agent_executor(
|
|
self, mock_llm: MagicMock
|
|
) -> None:
|
|
"""A user-defined subclass that opts into kickoff is honored verbatim
|
|
(rather than being silently replaced with ``AgentExecutor``)."""
|
|
|
|
class CustomAgentExecutor(AgentExecutor):
|
|
executor_type: Literal["custom"] = "custom" # type: ignore[assignment]
|
|
supports_kickoff: ClassVar[bool] = True
|
|
supports_internal_planning: ClassVar[bool] = True
|
|
|
|
agent = Agent(
|
|
role="r",
|
|
goal="g",
|
|
backstory="b",
|
|
llm=mock_llm,
|
|
executor_class=CustomAgentExecutor,
|
|
)
|
|
|
|
with warnings.catch_warnings(record=True) as caught:
|
|
warnings.simplefilter("always")
|
|
executor_cls = agent._resolve_kickoff_executor_class()
|
|
|
|
assert executor_cls is CustomAgentExecutor
|
|
kickoff_warnings = [
|
|
w for w in caught if "Agent.kickoff()" in str(w.message)
|
|
]
|
|
assert kickoff_warnings == []
|
|
|
|
def test_kickoff_warns_and_falls_back_when_executor_class_is_not_kickoff_capable(
|
|
self, mock_llm: MagicMock
|
|
) -> None:
|
|
"""Default-configured agents (``executor_class=CrewAgentExecutor``) used
|
|
to silently flip to ``AgentExecutor`` on ``kickoff()``. The fix keeps
|
|
the fallback for backwards compatibility but emits a
|
|
``DeprecationWarning`` so the silent override is now visible."""
|
|
agent = Agent(
|
|
role="r",
|
|
goal="g",
|
|
backstory="b",
|
|
llm=mock_llm,
|
|
executor_class=CrewAgentExecutor, # the default
|
|
)
|
|
|
|
with pytest.warns(DeprecationWarning, match=r"Agent\.kickoff\(\)"):
|
|
executor_cls = agent._resolve_kickoff_executor_class()
|
|
|
|
assert executor_cls is AgentExecutor
|
|
|
|
def test_kickoff_fallback_warning_mentions_explicit_opt_in(
|
|
self, mock_llm: MagicMock
|
|
) -> None:
|
|
"""The deprecation message should tell users how to silence it — by
|
|
setting ``executor_class=AgentExecutor`` explicitly."""
|
|
agent = Agent(
|
|
role="r",
|
|
goal="g",
|
|
backstory="b",
|
|
llm=mock_llm,
|
|
executor_class=CrewAgentExecutor,
|
|
)
|
|
|
|
with pytest.warns(DeprecationWarning) as record:
|
|
agent._resolve_kickoff_executor_class()
|
|
|
|
assert any(
|
|
"executor_class=AgentExecutor" in str(w.message) for w in record
|
|
)
|
|
|
|
def test_kickoff_constructs_executor_via_resolve_helper(
|
|
self, test_agent: Agent
|
|
) -> None:
|
|
"""``_prepare_kickoff()`` must route through ``_resolve_kickoff_executor_class``
|
|
rather than instantiating ``AgentExecutor`` directly. This guards against
|
|
regressing back to the issue #5736 hardcoded-class behavior."""
|
|
sentinel = MagicMock(name="sentinel_executor_cls")
|
|
sentinel.return_value = MagicMock(name="sentinel_executor")
|
|
|
|
with patch.object(
|
|
test_agent,
|
|
"_resolve_kickoff_executor_class",
|
|
return_value=sentinel,
|
|
) as mock_resolve, patch.object(
|
|
test_agent, "_build_execution_prompt"
|
|
) as mock_build_prompt:
|
|
mock_build_prompt.return_value = (
|
|
{"system": "sys"},
|
|
["Observation:"],
|
|
None,
|
|
)
|
|
test_agent._prepare_kickoff(messages="hello")
|
|
|
|
mock_resolve.assert_called_once_with()
|
|
sentinel.assert_called_once()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Agent.execute_task() honors executor_class (sanity check; pre-existing
|
|
# behavior we don't want to regress)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExecuteTaskRespectsExecutorClass:
|
|
"""``Agent.execute_task()`` already routed through ``self.executor_class``
|
|
— these tests pin that contract so future refactors keep both entry points
|
|
honoring the user's executor choice."""
|
|
|
|
def test_execute_task_instantiates_self_executor_class(
|
|
self, test_agent: Agent
|
|
) -> None:
|
|
"""The non-resuming branch of ``Agent.create_agent_executor`` should
|
|
call ``self.executor_class(...)``. Stubbing ``executor_class`` with a
|
|
factory lets us observe the instantiation without running the LLM
|
|
loop."""
|
|
captured: dict[str, Any] = {}
|
|
|
|
def fake_factory(*args: Any, **kwargs: Any) -> MagicMock:
|
|
captured["called"] = True
|
|
captured["kwargs"] = kwargs
|
|
return MagicMock(spec=CrewAgentExecutor)
|
|
|
|
task = Task(
|
|
description="d",
|
|
expected_output="e",
|
|
agent=test_agent,
|
|
)
|
|
test_agent.agent_executor = None
|
|
test_agent.executor_class = fake_factory # type: ignore[assignment]
|
|
|
|
with patch.object(test_agent, "_build_execution_prompt") as mock_bp:
|
|
mock_bp.return_value = ({"system": "s"}, ["Obs:"], None)
|
|
test_agent.create_agent_executor(task=task)
|
|
|
|
assert captured.get("called") is True
|
|
# The agent passed itself to its executor.
|
|
assert captured["kwargs"].get("agent") is test_agent
|
|
assert captured["kwargs"].get("task") is task
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# handle_reasoning gate: capability flag, not class identity
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestReasoningGateUsesCapabilityFlag:
|
|
"""Issue #5736 part 2: the reasoning gate must consult
|
|
``supports_internal_planning`` rather than testing class identity. This
|
|
way a ``CrewAgentExecutor`` subclass that opts into internal planning is
|
|
treated correctly — and so is an ``AgentExecutor`` subclass that doesn't."""
|
|
|
|
def test_reasoning_runs_for_executor_without_internal_planning(
|
|
self, test_agent: Agent
|
|
) -> None:
|
|
"""Default ``executor_class`` is ``CrewAgentExecutor`` which does NOT
|
|
plan internally, so the legacy ``handle_reasoning`` step must run."""
|
|
task = Task(description="d", expected_output="e", agent=test_agent)
|
|
|
|
assert test_agent.executor_class is CrewAgentExecutor
|
|
assert (
|
|
getattr(
|
|
test_agent.executor_class, "supports_internal_planning", False
|
|
)
|
|
is False
|
|
)
|
|
|
|
with patch(
|
|
"crewai.agent.core.handle_reasoning"
|
|
) as mock_handle_reasoning, patch.object(
|
|
test_agent, "_inject_date_to_task"
|
|
), patch.object(
|
|
test_agent, "_retrieve_memory_context", return_value="prompt"
|
|
):
|
|
test_agent._prepare_task_execution(task=task, context=None)
|
|
|
|
mock_handle_reasoning.assert_called_once_with(test_agent, task)
|
|
|
|
def test_reasoning_skipped_for_executor_with_internal_planning(
|
|
self, test_agent: Agent
|
|
) -> None:
|
|
"""``AgentExecutor.supports_internal_planning is True``, so the agent
|
|
should not invoke the external ``handle_reasoning`` step (the executor
|
|
runs its own ``generate_plan`` flow instead)."""
|
|
test_agent.executor_class = AgentExecutor # type: ignore[assignment]
|
|
task = Task(description="d", expected_output="e", agent=test_agent)
|
|
|
|
assert (
|
|
getattr(
|
|
test_agent.executor_class, "supports_internal_planning", False
|
|
)
|
|
is True
|
|
)
|
|
|
|
with patch(
|
|
"crewai.agent.core.handle_reasoning"
|
|
) as mock_handle_reasoning, patch.object(
|
|
test_agent, "_inject_date_to_task"
|
|
), patch.object(
|
|
test_agent, "_retrieve_memory_context", return_value="prompt"
|
|
):
|
|
test_agent._prepare_task_execution(task=task, context=None)
|
|
|
|
mock_handle_reasoning.assert_not_called()
|
|
|
|
def test_reasoning_gate_does_not_use_isinstance_or_class_identity(
|
|
self, test_agent: Agent
|
|
) -> None:
|
|
"""A ``CrewAgentExecutor`` subclass that opts into internal planning
|
|
(``supports_internal_planning = True``) must skip the legacy reasoning
|
|
step. This is the key behavioral promise of replacing the old
|
|
``self.executor_class is not AgentExecutor`` check with a flag-based
|
|
check."""
|
|
|
|
class PlanningCrewExecutor(CrewAgentExecutor):
|
|
executor_type: Literal["crew_planning"] = "crew_planning" # type: ignore[assignment]
|
|
supports_internal_planning: ClassVar[bool] = True
|
|
|
|
test_agent.executor_class = PlanningCrewExecutor # type: ignore[assignment]
|
|
task = Task(description="d", expected_output="e", agent=test_agent)
|
|
|
|
with patch(
|
|
"crewai.agent.core.handle_reasoning"
|
|
) as mock_handle_reasoning, patch.object(
|
|
test_agent, "_inject_date_to_task"
|
|
), patch.object(
|
|
test_agent, "_retrieve_memory_context", return_value="prompt"
|
|
):
|
|
test_agent._prepare_task_execution(task=task, context=None)
|
|
|
|
mock_handle_reasoning.assert_not_called()
|