mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-05 01:02:37 +00:00
Address Bugbot concern: self.tools includes internal tools (delegation, human input) while self.original_tools only has user-defined tools. Only route to native_no_tools when there are truly no tools at all, so agents with internal tools still use the ReAct loop. Add test for FC+internal-tools scenario. Co-Authored-By: João <joao@crewai.com>
511 lines
20 KiB
Python
511 lines
20 KiB
Python
"""Tests for output_pydantic behavior in ReAct flow when LLM doesn't support function calling.
|
|
|
|
Regression tests for https://github.com/crewAIInc/crewAI/issues/4695
|
|
|
|
When an LLM does NOT support function calling (supports_function_calling() returns False),
|
|
the executor should use the ReAct text-based pattern. In this path, response_model should
|
|
NOT be passed to the LLM call, because doing so forces structured output (via instructor/
|
|
tools mode) before the agent can reason through the Action/Observation loop.
|
|
|
|
The schema should still be embedded in the prompt text for guidance, and the final
|
|
conversion to pydantic/json should happen in task._export_output() after the ReAct loop.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
from pydantic import BaseModel, Field
|
|
|
|
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
|
from crewai.agents.parser import AgentFinish
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pydantic models used as output_pydantic in tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class PersonInfo(BaseModel):
|
|
"""A simple pydantic model for testing output_pydantic."""
|
|
|
|
name: str = Field(description="Person's name")
|
|
age: int = Field(description="Person's age")
|
|
|
|
|
|
class WeatherReport(BaseModel):
|
|
"""Another pydantic model for testing output_pydantic."""
|
|
|
|
city: str = Field(description="City name")
|
|
temperature: float = Field(description="Temperature in Fahrenheit")
|
|
condition: str = Field(description="Weather condition")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_llm(*, supports_fc: bool) -> MagicMock:
|
|
"""Create a mock LLM with configurable function-calling support."""
|
|
llm = MagicMock()
|
|
llm.supports_function_calling.return_value = supports_fc
|
|
llm.supports_stop_words.return_value = True
|
|
llm.stop = []
|
|
return llm
|
|
|
|
|
|
def _make_executor(
|
|
llm: MagicMock,
|
|
*,
|
|
response_model: type[BaseModel] | None = None,
|
|
) -> CrewAgentExecutor:
|
|
"""Create a CrewAgentExecutor with the given LLM and response_model."""
|
|
agent = MagicMock()
|
|
agent.role = "Test Agent"
|
|
agent.key = "test_agent_key"
|
|
agent.verbose = False
|
|
agent.id = "test_agent_id"
|
|
|
|
task = MagicMock()
|
|
task.description = "Test task"
|
|
|
|
crew = MagicMock()
|
|
crew.verbose = False
|
|
crew._train = False
|
|
|
|
executor = CrewAgentExecutor(
|
|
llm=llm,
|
|
task=task,
|
|
crew=crew,
|
|
agent=agent,
|
|
prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
|
|
max_iter=5,
|
|
tools=[],
|
|
tools_names="",
|
|
stop_words=["Observation:"],
|
|
tools_description="",
|
|
tools_handler=MagicMock(),
|
|
response_model=response_model,
|
|
)
|
|
return executor
|
|
|
|
|
|
# ===========================================================================
|
|
# Sync tests
|
|
# ===========================================================================
|
|
|
|
|
|
class TestReActFlowDoesNotPassResponseModel:
|
|
"""Verify that _invoke_loop_react does NOT pass response_model to LLM."""
|
|
|
|
def test_react_flow_passes_none_response_model_when_output_pydantic_set(
|
|
self,
|
|
) -> None:
|
|
"""When output_pydantic is set but LLM lacks function calling,
|
|
response_model must be None in the get_llm_response call."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.get_llm_response",
|
|
return_value="Thought: I know the answer\nFinal Answer: John is 30 years old",
|
|
) as mock_get_llm:
|
|
with patch.object(executor, "_show_logs"):
|
|
result = executor._invoke_loop()
|
|
|
|
# The critical assertion: response_model must be None in ReAct flow
|
|
call_kwargs = mock_get_llm.call_args
|
|
assert call_kwargs.kwargs.get("response_model") is None, (
|
|
"response_model should be None in ReAct flow, but got "
|
|
f"{call_kwargs.kwargs.get('response_model')}"
|
|
)
|
|
assert isinstance(result, AgentFinish)
|
|
|
|
def test_react_flow_does_not_use_instructor_for_non_fc_llm(self) -> None:
|
|
"""Ensure InternalInstructor is never invoked in the ReAct path."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=WeatherReport)
|
|
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.get_llm_response",
|
|
return_value="Thought: I found the weather\nFinal Answer: It is sunny in NYC at 72F",
|
|
):
|
|
with patch.object(executor, "_show_logs"):
|
|
with patch(
|
|
"crewai.utilities.internal_instructor.InternalInstructor"
|
|
) as mock_instructor:
|
|
executor._invoke_loop()
|
|
|
|
mock_instructor.assert_not_called()
|
|
|
|
def test_invoke_loop_routes_to_react_when_no_function_calling(self) -> None:
|
|
"""Confirm _invoke_loop routes to _invoke_loop_react when
|
|
supports_function_calling() returns False."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
|
|
with patch.object(
|
|
executor,
|
|
"_invoke_loop_react",
|
|
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
|
|
) as mock_react:
|
|
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
|
|
executor._invoke_loop()
|
|
|
|
mock_react.assert_called_once()
|
|
mock_native.assert_not_called()
|
|
|
|
def test_invoke_loop_routes_to_native_when_function_calling_supported(
|
|
self,
|
|
) -> None:
|
|
"""Confirm _invoke_loop routes to _invoke_loop_native_tools when
|
|
supports_function_calling() returns True AND tools are present."""
|
|
llm = _make_llm(supports_fc=True)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
# Need at least one tool for native path
|
|
executor.original_tools = [MagicMock()]
|
|
|
|
with patch.object(
|
|
executor,
|
|
"_invoke_loop_native_tools",
|
|
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
|
|
) as mock_native:
|
|
with patch.object(executor, "_invoke_loop_react") as mock_react:
|
|
executor._invoke_loop()
|
|
|
|
mock_native.assert_called_once()
|
|
mock_react.assert_not_called()
|
|
|
|
def test_invoke_loop_routes_to_native_no_tools_when_fc_no_tools_with_response_model(
|
|
self,
|
|
) -> None:
|
|
"""When LLM supports FC, has no tools (including internal tools),
|
|
but HAS a response_model, route to _invoke_loop_native_no_tools
|
|
(which correctly passes response_model for structured output)."""
|
|
llm = _make_llm(supports_fc=True)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
# No user-defined or internal tools
|
|
executor.original_tools = []
|
|
executor.tools = []
|
|
|
|
with patch.object(
|
|
executor,
|
|
"_invoke_loop_native_no_tools",
|
|
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
|
|
) as mock_native_no_tools:
|
|
with patch.object(executor, "_invoke_loop_react") as mock_react:
|
|
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
|
|
executor._invoke_loop()
|
|
|
|
mock_native_no_tools.assert_called_once()
|
|
mock_react.assert_not_called()
|
|
mock_native.assert_not_called()
|
|
|
|
def test_invoke_loop_routes_to_react_when_fc_no_orig_tools_but_internal_tools(
|
|
self,
|
|
) -> None:
|
|
"""When LLM supports FC, has no original_tools but HAS internal tools
|
|
(e.g. delegation), fall through to ReAct even with response_model.
|
|
Internal tools need the ReAct loop for Action/Observation cycles."""
|
|
llm = _make_llm(supports_fc=True)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
executor.original_tools = []
|
|
# Internal tools present (e.g. delegation tool)
|
|
executor.tools = [MagicMock()]
|
|
|
|
with patch.object(
|
|
executor,
|
|
"_invoke_loop_react",
|
|
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
|
|
) as mock_react:
|
|
with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools:
|
|
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
|
|
executor._invoke_loop()
|
|
|
|
mock_react.assert_called_once()
|
|
mock_native_no_tools.assert_not_called()
|
|
mock_native.assert_not_called()
|
|
|
|
def test_invoke_loop_routes_to_react_when_fc_no_tools_no_response_model(
|
|
self,
|
|
) -> None:
|
|
"""When LLM supports FC, has no tools, and NO response_model,
|
|
fall through to ReAct path (no structured output to preserve)."""
|
|
llm = _make_llm(supports_fc=True)
|
|
executor = _make_executor(llm, response_model=None)
|
|
executor.original_tools = []
|
|
executor.tools = []
|
|
|
|
with patch.object(
|
|
executor,
|
|
"_invoke_loop_react",
|
|
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
|
|
) as mock_react:
|
|
with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools:
|
|
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
|
|
executor._invoke_loop()
|
|
|
|
mock_react.assert_called_once()
|
|
mock_native_no_tools.assert_not_called()
|
|
mock_native.assert_not_called()
|
|
|
|
def test_react_flow_still_works_with_tool_usage(self) -> None:
|
|
"""Verify the ReAct loop still processes Action/Observation cycles
|
|
correctly even when output_pydantic is set."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
|
|
call_count = 0
|
|
|
|
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
|
|
nonlocal call_count
|
|
call_count += 1
|
|
# Verify response_model is None on every call
|
|
assert kwargs.get("response_model") is None, (
|
|
f"response_model should be None in ReAct flow (call {call_count})"
|
|
)
|
|
if call_count == 1:
|
|
return (
|
|
"Thought: I need to search for the person\n"
|
|
"Action: search_tool\n"
|
|
'Action Input: {"query": "John Doe"}'
|
|
)
|
|
return (
|
|
"Thought: I found the person info\n"
|
|
"Final Answer: John Doe is 30 years old"
|
|
)
|
|
|
|
from crewai.tools.tool_types import ToolResult
|
|
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.get_llm_response",
|
|
side_effect=mock_llm_response,
|
|
):
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
|
|
return_value=ToolResult(result="John Doe, age 30", result_as_answer=False),
|
|
):
|
|
with patch.object(executor, "_show_logs"):
|
|
with patch.object(executor, "_handle_agent_action") as mock_handle:
|
|
from crewai.agents.parser import AgentAction
|
|
|
|
mock_handle.return_value = AgentAction(
|
|
text="Tool result",
|
|
tool="search_tool",
|
|
tool_input='{"query": "John Doe"}',
|
|
thought="Used tool",
|
|
result="John Doe, age 30",
|
|
)
|
|
result = executor._invoke_loop()
|
|
|
|
assert isinstance(result, AgentFinish)
|
|
assert call_count == 2, f"Expected 2 LLM calls, got {call_count}"
|
|
|
|
def test_react_flow_without_response_model_unchanged(self) -> None:
|
|
"""Verify the ReAct flow still works normally when no response_model is set."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=None)
|
|
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.get_llm_response",
|
|
return_value="Thought: Simple answer\nFinal Answer: Hello world",
|
|
) as mock_get_llm:
|
|
with patch.object(executor, "_show_logs"):
|
|
result = executor._invoke_loop()
|
|
|
|
call_kwargs = mock_get_llm.call_args
|
|
assert call_kwargs.kwargs.get("response_model") is None
|
|
assert isinstance(result, AgentFinish)
|
|
|
|
|
|
# ===========================================================================
|
|
# Async tests
|
|
# ===========================================================================
|
|
|
|
|
|
class TestAsyncReActFlowDoesNotPassResponseModel:
|
|
"""Verify that _ainvoke_loop_react does NOT pass response_model to LLM."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_react_flow_passes_none_response_model(self) -> None:
|
|
"""Async variant: response_model must be None in ReAct flow."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.aget_llm_response",
|
|
new_callable=AsyncMock,
|
|
return_value="Thought: I know\nFinal Answer: John is 30",
|
|
) as mock_aget_llm:
|
|
with patch.object(executor, "_show_logs"):
|
|
result = await executor._ainvoke_loop()
|
|
|
|
call_kwargs = mock_aget_llm.call_args
|
|
assert call_kwargs.kwargs.get("response_model") is None, (
|
|
"response_model should be None in async ReAct flow"
|
|
)
|
|
assert isinstance(result, AgentFinish)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_invoke_loop_routes_to_react_when_no_fc(self) -> None:
|
|
"""Async: _ainvoke_loop routes to _ainvoke_loop_react when
|
|
supports_function_calling() returns False."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=WeatherReport)
|
|
|
|
with patch.object(
|
|
executor,
|
|
"_ainvoke_loop_react",
|
|
new_callable=AsyncMock,
|
|
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
|
|
) as mock_react:
|
|
with patch.object(executor, "_ainvoke_loop_native_tools") as mock_native:
|
|
await executor._ainvoke_loop()
|
|
|
|
mock_react.assert_called_once()
|
|
mock_native.assert_not_called()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_react_flow_with_tool_usage(self) -> None:
|
|
"""Async: ReAct loop processes tool calls correctly with output_pydantic."""
|
|
llm = _make_llm(supports_fc=False)
|
|
executor = _make_executor(llm, response_model=PersonInfo)
|
|
|
|
call_count = 0
|
|
|
|
async def mock_llm_response(*args: Any, **kwargs: Any) -> str:
|
|
nonlocal call_count
|
|
call_count += 1
|
|
assert kwargs.get("response_model") is None
|
|
if call_count == 1:
|
|
return (
|
|
"Thought: I need to search\n"
|
|
"Action: search_tool\n"
|
|
'Action Input: {"query": "test"}'
|
|
)
|
|
return "Thought: Done\nFinal Answer: Result found"
|
|
|
|
from crewai.tools.tool_types import ToolResult
|
|
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.aget_llm_response",
|
|
new_callable=AsyncMock,
|
|
side_effect=mock_llm_response,
|
|
):
|
|
with patch(
|
|
"crewai.agents.crew_agent_executor.aexecute_tool_and_check_finality",
|
|
new_callable=AsyncMock,
|
|
return_value=ToolResult(result="Found it", result_as_answer=False),
|
|
):
|
|
with patch.object(executor, "_show_logs"):
|
|
with patch.object(executor, "_handle_agent_action") as mock_handle:
|
|
from crewai.agents.parser import AgentAction
|
|
|
|
mock_handle.return_value = AgentAction(
|
|
text="Tool result",
|
|
tool="search_tool",
|
|
tool_input='{"query": "test"}',
|
|
thought="Searching",
|
|
result="Found it",
|
|
)
|
|
result = await executor._ainvoke_loop()
|
|
|
|
assert isinstance(result, AgentFinish)
|
|
assert call_count == 2
|
|
|
|
|
|
# ===========================================================================
|
|
# Integration-style tests (Crew-level)
|
|
# ===========================================================================
|
|
|
|
|
|
class TestCrewLevelOutputPydanticWithNonFCModel:
|
|
"""Higher-level tests verifying that a Crew with output_pydantic works
|
|
correctly when the LLM doesn't support function calling."""
|
|
|
|
def test_crew_output_pydantic_with_non_fc_llm_uses_react(self) -> None:
|
|
"""A Crew with output_pydantic should still use ReAct flow and NOT
|
|
pass response_model to the LLM when it doesn't support FC."""
|
|
from crewai import Agent, Crew, Task
|
|
|
|
llm = MagicMock()
|
|
llm.supports_function_calling.return_value = False
|
|
llm.supports_stop_words.return_value = True
|
|
llm.stop = []
|
|
llm.model = "ollama/llama3"
|
|
# Return a valid ReAct final answer
|
|
llm.call.return_value = (
|
|
"Thought: I know the answer\n"
|
|
'Final Answer: {"name": "John Doe", "age": 30}'
|
|
)
|
|
|
|
# Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM
|
|
with patch("crewai.agent.core.create_llm", return_value=llm):
|
|
agent = Agent(
|
|
role="Researcher",
|
|
goal="Find person info",
|
|
backstory="You research people.",
|
|
llm=llm,
|
|
verbose=False,
|
|
)
|
|
|
|
task = Task(
|
|
description="Find info about John Doe",
|
|
expected_output="Person info as JSON",
|
|
agent=agent,
|
|
output_pydantic=PersonInfo,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task], verbose=False)
|
|
result = crew.kickoff()
|
|
|
|
# Verify llm.call was invoked
|
|
assert llm.call.called
|
|
|
|
# Verify response_model was NOT passed to llm.call
|
|
for call_args in llm.call.call_args_list:
|
|
rm = call_args.kwargs.get("response_model")
|
|
assert rm is None, (
|
|
f"response_model should be None for non-FC LLM, got {rm}"
|
|
)
|
|
|
|
assert result is not None
|
|
|
|
def test_crew_output_pydantic_with_fc_llm_uses_native_tools(self) -> None:
|
|
"""A Crew with output_pydantic and an FC-capable LLM should use
|
|
native tools flow and CAN pass response_model."""
|
|
from crewai import Agent, Crew, Task
|
|
|
|
llm = MagicMock()
|
|
llm.supports_function_calling.return_value = True
|
|
llm.supports_stop_words.return_value = True
|
|
llm.stop = []
|
|
llm.model = "gpt-4o-mini"
|
|
# Return a valid final answer (no tool calls)
|
|
llm.call.return_value = '{"name": "Jane Doe", "age": 25}'
|
|
|
|
# Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM
|
|
with patch("crewai.agent.core.create_llm", return_value=llm):
|
|
agent = Agent(
|
|
role="Researcher",
|
|
goal="Find person info",
|
|
backstory="You research people.",
|
|
llm=llm,
|
|
verbose=False,
|
|
)
|
|
|
|
task = Task(
|
|
description="Find info about Jane Doe",
|
|
expected_output="Person info as JSON",
|
|
agent=agent,
|
|
output_pydantic=PersonInfo,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task], verbose=False)
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|