Compare commits

...

3 Commits

Author SHA1 Message Date
Devin AI
c5d438402f fix: use self.tools instead of self.original_tools for no-tools routing
Address Bugbot concern: self.tools includes internal tools (delegation,
human input) while self.original_tools only has user-defined tools.
Only route to native_no_tools when there are truly no tools at all,
so agents with internal tools still use the ReAct loop.

Add test for FC+internal-tools scenario.

Co-Authored-By: João <joao@crewai.com>
2026-03-04 12:37:50 +00:00
Devin AI
7e60321945 fix: address review comments - unused import, FC+no-tools routing
- Remove unused asyncio import from test file
- Route FC-capable LLMs with no tools + response_model to
  _invoke_loop_native_no_tools (preserves structured output)
- FC-capable LLMs with no tools and no response_model still
  fall through to ReAct path (no regression)
- Add tests for both FC+no-tools routing scenarios

Co-Authored-By: João <joao@crewai.com>
2026-03-04 12:32:30 +00:00
Devin AI
20be4ae62b fix: prevent response_model from being passed in ReAct flow when LLM lacks function calling
When an LLM does not support function calling (supports_function_calling()
returns False), the executor falls back to the ReAct text-based pattern.
Previously, response_model (set from task.output_pydantic) was still passed
to get_llm_response in the ReAct path, which caused InternalInstructor to
force structured output via instructor's TOOLS mode before the agent could
reason through Action/Observation cycles.

This fix sets response_model=None in both _invoke_loop_react and
_ainvoke_loop_react, allowing the ReAct loop to work normally. The output
schema is already embedded in the prompt text for guidance, and the final
conversion to pydantic/json happens in task._export_output() after the
agent finishes.

Fixes #4695

Co-Authored-By: João <joao@crewai.com>
2026-03-04 12:24:12 +00:00
2 changed files with 555 additions and 73 deletions

View File

@@ -311,16 +311,22 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
Final answer from the agent.
"""
# Check if model supports native function calling
use_native_tools = (
supports_fc = (
hasattr(self.llm, "supports_function_calling")
and callable(getattr(self.llm, "supports_function_calling", None))
and self.llm.supports_function_calling()
and self.original_tools
)
if use_native_tools:
if supports_fc and self.original_tools:
return self._invoke_loop_native_tools()
# FC-capable LLM with no user-defined tools but with response_model
# and no internal tools (delegation, human input, etc.): use simple
# native call path which correctly passes response_model for structured
# output instead of dropping it in the ReAct path.
if supports_fc and not self.tools and self.response_model:
return self._invoke_loop_native_no_tools()
# Fall back to ReAct text-based pattern
return self._invoke_loop_react()
@@ -351,6 +357,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
enforce_rpm_limit(self.request_within_rpm_limit)
# In the ReAct flow, do NOT pass response_model to the LLM call.
# When the LLM doesn't support function calling, passing response_model
# forces structured output (via instructor/tools mode) before the agent
# can reason through the Action/Observation loop. The output schema is
# already embedded in the prompt text for guidance, and the final
# conversion to pydantic/json happens in task._export_output().
# See: https://github.com/crewAIInc/crewAI/issues/4695
answer = get_llm_response(
llm=self.llm,
messages=self.messages,
@@ -358,43 +371,16 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
printer=self._printer,
from_task=self.task,
from_agent=self.agent,
response_model=self.response_model,
response_model=None,
executor_context=self,
verbose=self.agent.verbose,
)
# breakpoint()
if self.response_model is not None:
try:
if isinstance(answer, BaseModel):
output_json = answer.model_dump_json()
formatted_answer = AgentFinish(
thought="",
output=answer,
text=output_json,
)
else:
self.response_model.model_validate_json(answer)
formatted_answer = AgentFinish(
thought="",
output=answer,
text=answer,
)
except ValidationError:
# If validation fails, convert BaseModel to JSON string for parsing
answer_str = (
answer.model_dump_json()
if isinstance(answer, BaseModel)
else str(answer)
)
formatted_answer = process_llm_response(
answer_str, self.use_stop_words
) # type: ignore[assignment]
else:
# When no response_model, answer should be a string
answer_str = str(answer) if not isinstance(answer, str) else answer
formatted_answer = process_llm_response(
answer_str, self.use_stop_words
) # type: ignore[assignment]
# When no response_model is passed, answer should be a string
answer_str = str(answer) if not isinstance(answer, str) else answer
formatted_answer = process_llm_response(
answer_str, self.use_stop_words
) # type: ignore[assignment]
if isinstance(formatted_answer, AgentAction):
# Extract agent fingerprint if available
@@ -1152,16 +1138,22 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
Final answer from the agent.
"""
# Check if model supports native function calling
use_native_tools = (
supports_fc = (
hasattr(self.llm, "supports_function_calling")
and callable(getattr(self.llm, "supports_function_calling", None))
and self.llm.supports_function_calling()
and self.original_tools
)
if use_native_tools:
if supports_fc and self.original_tools:
return await self._ainvoke_loop_native_tools()
# FC-capable LLM with no user-defined tools but with response_model
# and no internal tools (delegation, human input, etc.): use simple
# native call path which correctly passes response_model for structured
# output instead of dropping it in the ReAct path.
if supports_fc and not self.tools and self.response_model:
return await self._ainvoke_loop_native_no_tools()
# Fall back to ReAct text-based pattern
return await self._ainvoke_loop_react()
@@ -1188,6 +1180,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
enforce_rpm_limit(self.request_within_rpm_limit)
# In the ReAct flow, do NOT pass response_model to the LLM call.
# When the LLM doesn't support function calling, passing response_model
# forces structured output (via instructor/tools mode) before the agent
# can reason through the Action/Observation loop. The output schema is
# already embedded in the prompt text for guidance, and the final
# conversion to pydantic/json happens in task._export_output().
# See: https://github.com/crewAIInc/crewAI/issues/4695
answer = await aget_llm_response(
llm=self.llm,
messages=self.messages,
@@ -1195,43 +1194,16 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
printer=self._printer,
from_task=self.task,
from_agent=self.agent,
response_model=self.response_model,
response_model=None,
executor_context=self,
verbose=self.agent.verbose,
)
if self.response_model is not None:
try:
if isinstance(answer, BaseModel):
output_json = answer.model_dump_json()
formatted_answer = AgentFinish(
thought="",
output=answer,
text=output_json,
)
else:
self.response_model.model_validate_json(answer)
formatted_answer = AgentFinish(
thought="",
output=answer,
text=answer,
)
except ValidationError:
# If validation fails, convert BaseModel to JSON string for parsing
answer_str = (
answer.model_dump_json()
if isinstance(answer, BaseModel)
else str(answer)
)
formatted_answer = process_llm_response(
answer_str, self.use_stop_words
) # type: ignore[assignment]
else:
# When no response_model, answer should be a string
answer_str = str(answer) if not isinstance(answer, str) else answer
formatted_answer = process_llm_response(
answer_str, self.use_stop_words
) # type: ignore[assignment]
# When no response_model is passed, answer should be a string
answer_str = str(answer) if not isinstance(answer, str) else answer
formatted_answer = process_llm_response(
answer_str, self.use_stop_words
) # type: ignore[assignment]
if isinstance(formatted_answer, AgentAction):
fingerprint_context = {}

View File

@@ -0,0 +1,510 @@
"""Tests for output_pydantic behavior in ReAct flow when LLM doesn't support function calling.
Regression tests for https://github.com/crewAIInc/crewAI/issues/4695
When an LLM does NOT support function calling (supports_function_calling() returns False),
the executor should use the ReAct text-based pattern. In this path, response_model should
NOT be passed to the LLM call, because doing so forces structured output (via instructor/
tools mode) before the agent can reason through the Action/Observation loop.
The schema should still be embedded in the prompt text for guidance, and the final
conversion to pydantic/json should happen in task._export_output() after the ReAct loop.
"""
from __future__ import annotations
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from pydantic import BaseModel, Field
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentFinish
# ---------------------------------------------------------------------------
# Pydantic models used as output_pydantic in tests
# ---------------------------------------------------------------------------
class PersonInfo(BaseModel):
"""A simple pydantic model for testing output_pydantic."""
name: str = Field(description="Person's name")
age: int = Field(description="Person's age")
class WeatherReport(BaseModel):
"""Another pydantic model for testing output_pydantic."""
city: str = Field(description="City name")
temperature: float = Field(description="Temperature in Fahrenheit")
condition: str = Field(description="Weather condition")
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
def _make_llm(*, supports_fc: bool) -> MagicMock:
"""Create a mock LLM with configurable function-calling support."""
llm = MagicMock()
llm.supports_function_calling.return_value = supports_fc
llm.supports_stop_words.return_value = True
llm.stop = []
return llm
def _make_executor(
llm: MagicMock,
*,
response_model: type[BaseModel] | None = None,
) -> CrewAgentExecutor:
"""Create a CrewAgentExecutor with the given LLM and response_model."""
agent = MagicMock()
agent.role = "Test Agent"
agent.key = "test_agent_key"
agent.verbose = False
agent.id = "test_agent_id"
task = MagicMock()
task.description = "Test task"
crew = MagicMock()
crew.verbose = False
crew._train = False
executor = CrewAgentExecutor(
llm=llm,
task=task,
crew=crew,
agent=agent,
prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
max_iter=5,
tools=[],
tools_names="",
stop_words=["Observation:"],
tools_description="",
tools_handler=MagicMock(),
response_model=response_model,
)
return executor
# ===========================================================================
# Sync tests
# ===========================================================================
class TestReActFlowDoesNotPassResponseModel:
"""Verify that _invoke_loop_react does NOT pass response_model to LLM."""
def test_react_flow_passes_none_response_model_when_output_pydantic_set(
self,
) -> None:
"""When output_pydantic is set but LLM lacks function calling,
response_model must be None in the get_llm_response call."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=PersonInfo)
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
return_value="Thought: I know the answer\nFinal Answer: John is 30 years old",
) as mock_get_llm:
with patch.object(executor, "_show_logs"):
result = executor._invoke_loop()
# The critical assertion: response_model must be None in ReAct flow
call_kwargs = mock_get_llm.call_args
assert call_kwargs.kwargs.get("response_model") is None, (
"response_model should be None in ReAct flow, but got "
f"{call_kwargs.kwargs.get('response_model')}"
)
assert isinstance(result, AgentFinish)
def test_react_flow_does_not_use_instructor_for_non_fc_llm(self) -> None:
"""Ensure InternalInstructor is never invoked in the ReAct path."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=WeatherReport)
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
return_value="Thought: I found the weather\nFinal Answer: It is sunny in NYC at 72F",
):
with patch.object(executor, "_show_logs"):
with patch(
"crewai.utilities.internal_instructor.InternalInstructor"
) as mock_instructor:
executor._invoke_loop()
mock_instructor.assert_not_called()
def test_invoke_loop_routes_to_react_when_no_function_calling(self) -> None:
"""Confirm _invoke_loop routes to _invoke_loop_react when
supports_function_calling() returns False."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=PersonInfo)
with patch.object(
executor,
"_invoke_loop_react",
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
) as mock_react:
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
executor._invoke_loop()
mock_react.assert_called_once()
mock_native.assert_not_called()
def test_invoke_loop_routes_to_native_when_function_calling_supported(
self,
) -> None:
"""Confirm _invoke_loop routes to _invoke_loop_native_tools when
supports_function_calling() returns True AND tools are present."""
llm = _make_llm(supports_fc=True)
executor = _make_executor(llm, response_model=PersonInfo)
# Need at least one tool for native path
executor.original_tools = [MagicMock()]
with patch.object(
executor,
"_invoke_loop_native_tools",
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
) as mock_native:
with patch.object(executor, "_invoke_loop_react") as mock_react:
executor._invoke_loop()
mock_native.assert_called_once()
mock_react.assert_not_called()
def test_invoke_loop_routes_to_native_no_tools_when_fc_no_tools_with_response_model(
self,
) -> None:
"""When LLM supports FC, has no tools (including internal tools),
but HAS a response_model, route to _invoke_loop_native_no_tools
(which correctly passes response_model for structured output)."""
llm = _make_llm(supports_fc=True)
executor = _make_executor(llm, response_model=PersonInfo)
# No user-defined or internal tools
executor.original_tools = []
executor.tools = []
with patch.object(
executor,
"_invoke_loop_native_no_tools",
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
) as mock_native_no_tools:
with patch.object(executor, "_invoke_loop_react") as mock_react:
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
executor._invoke_loop()
mock_native_no_tools.assert_called_once()
mock_react.assert_not_called()
mock_native.assert_not_called()
def test_invoke_loop_routes_to_react_when_fc_no_orig_tools_but_internal_tools(
self,
) -> None:
"""When LLM supports FC, has no original_tools but HAS internal tools
(e.g. delegation), fall through to ReAct even with response_model.
Internal tools need the ReAct loop for Action/Observation cycles."""
llm = _make_llm(supports_fc=True)
executor = _make_executor(llm, response_model=PersonInfo)
executor.original_tools = []
# Internal tools present (e.g. delegation tool)
executor.tools = [MagicMock()]
with patch.object(
executor,
"_invoke_loop_react",
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
) as mock_react:
with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools:
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
executor._invoke_loop()
mock_react.assert_called_once()
mock_native_no_tools.assert_not_called()
mock_native.assert_not_called()
def test_invoke_loop_routes_to_react_when_fc_no_tools_no_response_model(
self,
) -> None:
"""When LLM supports FC, has no tools, and NO response_model,
fall through to ReAct path (no structured output to preserve)."""
llm = _make_llm(supports_fc=True)
executor = _make_executor(llm, response_model=None)
executor.original_tools = []
executor.tools = []
with patch.object(
executor,
"_invoke_loop_react",
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
) as mock_react:
with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools:
with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
executor._invoke_loop()
mock_react.assert_called_once()
mock_native_no_tools.assert_not_called()
mock_native.assert_not_called()
def test_react_flow_still_works_with_tool_usage(self) -> None:
"""Verify the ReAct loop still processes Action/Observation cycles
correctly even when output_pydantic is set."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=PersonInfo)
call_count = 0
def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
# Verify response_model is None on every call
assert kwargs.get("response_model") is None, (
f"response_model should be None in ReAct flow (call {call_count})"
)
if call_count == 1:
return (
"Thought: I need to search for the person\n"
"Action: search_tool\n"
'Action Input: {"query": "John Doe"}'
)
return (
"Thought: I found the person info\n"
"Final Answer: John Doe is 30 years old"
)
from crewai.tools.tool_types import ToolResult
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
return_value=ToolResult(result="John Doe, age 30", result_as_answer=False),
):
with patch.object(executor, "_show_logs"):
with patch.object(executor, "_handle_agent_action") as mock_handle:
from crewai.agents.parser import AgentAction
mock_handle.return_value = AgentAction(
text="Tool result",
tool="search_tool",
tool_input='{"query": "John Doe"}',
thought="Used tool",
result="John Doe, age 30",
)
result = executor._invoke_loop()
assert isinstance(result, AgentFinish)
assert call_count == 2, f"Expected 2 LLM calls, got {call_count}"
def test_react_flow_without_response_model_unchanged(self) -> None:
"""Verify the ReAct flow still works normally when no response_model is set."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=None)
with patch(
"crewai.agents.crew_agent_executor.get_llm_response",
return_value="Thought: Simple answer\nFinal Answer: Hello world",
) as mock_get_llm:
with patch.object(executor, "_show_logs"):
result = executor._invoke_loop()
call_kwargs = mock_get_llm.call_args
assert call_kwargs.kwargs.get("response_model") is None
assert isinstance(result, AgentFinish)
# ===========================================================================
# Async tests
# ===========================================================================
class TestAsyncReActFlowDoesNotPassResponseModel:
"""Verify that _ainvoke_loop_react does NOT pass response_model to LLM."""
@pytest.mark.asyncio
async def test_async_react_flow_passes_none_response_model(self) -> None:
"""Async variant: response_model must be None in ReAct flow."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=PersonInfo)
with patch(
"crewai.agents.crew_agent_executor.aget_llm_response",
new_callable=AsyncMock,
return_value="Thought: I know\nFinal Answer: John is 30",
) as mock_aget_llm:
with patch.object(executor, "_show_logs"):
result = await executor._ainvoke_loop()
call_kwargs = mock_aget_llm.call_args
assert call_kwargs.kwargs.get("response_model") is None, (
"response_model should be None in async ReAct flow"
)
assert isinstance(result, AgentFinish)
@pytest.mark.asyncio
async def test_async_invoke_loop_routes_to_react_when_no_fc(self) -> None:
"""Async: _ainvoke_loop routes to _ainvoke_loop_react when
supports_function_calling() returns False."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=WeatherReport)
with patch.object(
executor,
"_ainvoke_loop_react",
new_callable=AsyncMock,
return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
) as mock_react:
with patch.object(executor, "_ainvoke_loop_native_tools") as mock_native:
await executor._ainvoke_loop()
mock_react.assert_called_once()
mock_native.assert_not_called()
@pytest.mark.asyncio
async def test_async_react_flow_with_tool_usage(self) -> None:
"""Async: ReAct loop processes tool calls correctly with output_pydantic."""
llm = _make_llm(supports_fc=False)
executor = _make_executor(llm, response_model=PersonInfo)
call_count = 0
async def mock_llm_response(*args: Any, **kwargs: Any) -> str:
nonlocal call_count
call_count += 1
assert kwargs.get("response_model") is None
if call_count == 1:
return (
"Thought: I need to search\n"
"Action: search_tool\n"
'Action Input: {"query": "test"}'
)
return "Thought: Done\nFinal Answer: Result found"
from crewai.tools.tool_types import ToolResult
with patch(
"crewai.agents.crew_agent_executor.aget_llm_response",
new_callable=AsyncMock,
side_effect=mock_llm_response,
):
with patch(
"crewai.agents.crew_agent_executor.aexecute_tool_and_check_finality",
new_callable=AsyncMock,
return_value=ToolResult(result="Found it", result_as_answer=False),
):
with patch.object(executor, "_show_logs"):
with patch.object(executor, "_handle_agent_action") as mock_handle:
from crewai.agents.parser import AgentAction
mock_handle.return_value = AgentAction(
text="Tool result",
tool="search_tool",
tool_input='{"query": "test"}',
thought="Searching",
result="Found it",
)
result = await executor._ainvoke_loop()
assert isinstance(result, AgentFinish)
assert call_count == 2
# ===========================================================================
# Integration-style tests (Crew-level)
# ===========================================================================
class TestCrewLevelOutputPydanticWithNonFCModel:
"""Higher-level tests verifying that a Crew with output_pydantic works
correctly when the LLM doesn't support function calling."""
def test_crew_output_pydantic_with_non_fc_llm_uses_react(self) -> None:
"""A Crew with output_pydantic should still use ReAct flow and NOT
pass response_model to the LLM when it doesn't support FC."""
from crewai import Agent, Crew, Task
llm = MagicMock()
llm.supports_function_calling.return_value = False
llm.supports_stop_words.return_value = True
llm.stop = []
llm.model = "ollama/llama3"
# Return a valid ReAct final answer
llm.call.return_value = (
"Thought: I know the answer\n"
'Final Answer: {"name": "John Doe", "age": 30}'
)
# Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM
with patch("crewai.agent.core.create_llm", return_value=llm):
agent = Agent(
role="Researcher",
goal="Find person info",
backstory="You research people.",
llm=llm,
verbose=False,
)
task = Task(
description="Find info about John Doe",
expected_output="Person info as JSON",
agent=agent,
output_pydantic=PersonInfo,
)
crew = Crew(agents=[agent], tasks=[task], verbose=False)
result = crew.kickoff()
# Verify llm.call was invoked
assert llm.call.called
# Verify response_model was NOT passed to llm.call
for call_args in llm.call.call_args_list:
rm = call_args.kwargs.get("response_model")
assert rm is None, (
f"response_model should be None for non-FC LLM, got {rm}"
)
assert result is not None
def test_crew_output_pydantic_with_fc_llm_uses_native_tools(self) -> None:
"""A Crew with output_pydantic and an FC-capable LLM should use
native tools flow and CAN pass response_model."""
from crewai import Agent, Crew, Task
llm = MagicMock()
llm.supports_function_calling.return_value = True
llm.supports_stop_words.return_value = True
llm.stop = []
llm.model = "gpt-4o-mini"
# Return a valid final answer (no tool calls)
llm.call.return_value = '{"name": "Jane Doe", "age": 25}'
# Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM
with patch("crewai.agent.core.create_llm", return_value=llm):
agent = Agent(
role="Researcher",
goal="Find person info",
backstory="You research people.",
llm=llm,
verbose=False,
)
task = Task(
description="Find info about Jane Doe",
expected_output="Person info as JSON",
agent=agent,
output_pydantic=PersonInfo,
)
crew = Crew(agents=[agent], tasks=[task], verbose=False)
result = crew.kickoff()
assert result is not None