mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-15 23:42:37 +00:00
fix: prevent response_model from leaking into tool-calling loop (#5472)
When output_pydantic / response_model is set on a task, the response_model was being forwarded to the LLM on every iteration of the tool-calling loop. This causes many non-OpenAI LLMs to skip tool calls entirely because tools and response_format parameters conflict. Fix: Don't pass response_model to the LLM during tool-calling loop iterations when the agent has tools. Structured output conversion is already handled as post-processing via Task._export_output(). Changes: - crew_agent_executor.py: _invoke_loop_react, _invoke_loop_native_tools, _ainvoke_loop_react, _ainvoke_loop_native_tools now pass response_model=None when tools are present - experimental/agent_executor.py: call_llm_and_parse, call_llm_native_tools similarly updated - Added regression tests in TestResponseModelNotLeakedDuringToolLoop Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -334,6 +334,15 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
# Don't pass response_model when tools are present to avoid
|
||||
# conflicting with tool-calling — tools and response_format
|
||||
# can't coexist on many LLM providers (see issue #5472).
|
||||
# Structured output is applied as post-processing via
|
||||
# Task._export_output() after the tool loop completes.
|
||||
effective_response_model = (
|
||||
None if self.original_tools else self.response_model
|
||||
)
|
||||
|
||||
answer = get_llm_response(
|
||||
llm=cast("BaseLLM", self.llm),
|
||||
messages=self.messages,
|
||||
@@ -341,11 +350,11 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
printer=PRINTER,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=effective_response_model,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
if self.response_model is not None:
|
||||
if effective_response_model is not None:
|
||||
try:
|
||||
if isinstance(answer, BaseModel):
|
||||
output_json = answer.model_dump_json()
|
||||
@@ -477,6 +486,10 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
# Never pass response_model in the native tool-calling
|
||||
# loop — tools and response_format can't coexist on many
|
||||
# LLM providers (see issue #5472). Structured output is
|
||||
# applied as post-processing via Task._export_output().
|
||||
answer = get_llm_response(
|
||||
llm=cast("BaseLLM", self.llm),
|
||||
messages=self.messages,
|
||||
@@ -486,7 +499,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
available_functions=None,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=None,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
@@ -1142,6 +1155,12 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
# Don't pass response_model when tools are present to avoid
|
||||
# conflicting with tool-calling (see issue #5472).
|
||||
effective_response_model = (
|
||||
None if self.original_tools else self.response_model
|
||||
)
|
||||
|
||||
answer = await aget_llm_response(
|
||||
llm=cast("BaseLLM", self.llm),
|
||||
messages=self.messages,
|
||||
@@ -1149,12 +1168,12 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
printer=PRINTER,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=effective_response_model,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
|
||||
if self.response_model is not None:
|
||||
if effective_response_model is not None:
|
||||
try:
|
||||
if isinstance(answer, BaseModel):
|
||||
output_json = answer.model_dump_json()
|
||||
@@ -1286,6 +1305,8 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
# Never pass response_model in the native tool-calling
|
||||
# loop (see issue #5472).
|
||||
answer = await aget_llm_response(
|
||||
llm=cast("BaseLLM", self.llm),
|
||||
messages=self.messages,
|
||||
@@ -1295,7 +1316,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
available_functions=None,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=None,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
|
||||
@@ -1222,6 +1222,12 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor
|
||||
try:
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
# Don't pass response_model when tools are present to avoid
|
||||
# conflicting with tool-calling (see issue #5472).
|
||||
effective_response_model = (
|
||||
None if self.original_tools else self.response_model
|
||||
)
|
||||
|
||||
answer = get_llm_response(
|
||||
llm=self.llm,
|
||||
messages=list(self.state.messages),
|
||||
@@ -1229,7 +1235,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor
|
||||
printer=PRINTER,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=effective_response_model,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
@@ -1307,7 +1313,8 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
# Call LLM with native tools
|
||||
# Call LLM with native tools — never pass response_model
|
||||
# in the native tool-calling loop (see issue #5472).
|
||||
answer = get_llm_response(
|
||||
llm=self.llm,
|
||||
messages=list(self.state.messages),
|
||||
@@ -1317,7 +1324,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor
|
||||
available_functions=None,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=None,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
|
||||
@@ -2044,3 +2044,185 @@ class TestVisionImageFormatContract:
|
||||
assert hasattr(AnthropicCompletion, "_convert_image_blocks"), (
|
||||
"Anthropic provider must have _convert_image_blocks for auto-conversion"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# response_model must not leak into the tool-calling loop (issue #5472)
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestResponseModelNotLeakedDuringToolLoop:
|
||||
"""Regression tests for issue #5472.
|
||||
|
||||
When output_pydantic / response_model is set on a task, the response_model
|
||||
must NOT be forwarded to the LLM during tool-calling loop iterations.
|
||||
Passing both `tools` and `response_format` causes many non-OpenAI LLMs to
|
||||
skip tool calls entirely. Structured output conversion should happen only
|
||||
as post-processing via Task._export_output().
|
||||
"""
|
||||
|
||||
# -- helpers ----------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def mock_deps_with_tools(self):
|
||||
"""Dependencies that include original_tools (simulating an agent with tools)."""
|
||||
from pydantic import BaseModel as _BaseModel
|
||||
|
||||
class DummyOutput(_BaseModel):
|
||||
result: str
|
||||
|
||||
llm = Mock()
|
||||
llm.stop = []
|
||||
llm.supports_stop_words.return_value = True
|
||||
|
||||
task = Mock()
|
||||
task.description = "Test task"
|
||||
task.human_input = False
|
||||
task.response_model = DummyOutput
|
||||
|
||||
crew = Mock()
|
||||
crew.verbose = False
|
||||
crew._train = False
|
||||
|
||||
agent = Mock()
|
||||
agent.id = "test-agent"
|
||||
agent.role = "Tester"
|
||||
agent.verbose = False
|
||||
agent.key = "test-key"
|
||||
|
||||
dummy_tool = Mock()
|
||||
dummy_tool.name = "search"
|
||||
dummy_tool.description = "Search tool"
|
||||
|
||||
return {
|
||||
"llm": llm,
|
||||
"task": task,
|
||||
"crew": crew,
|
||||
"agent": agent,
|
||||
"prompt": {"prompt": "Test {input} {tool_names} {tools}"},
|
||||
"max_iter": 10,
|
||||
"tools": [dummy_tool],
|
||||
"tools_names": "search",
|
||||
"stop_words": ["Observation"],
|
||||
"tools_description": "search: Search tool",
|
||||
"tools_handler": Mock(spec=_ToolsHandler),
|
||||
"original_tools": [dummy_tool],
|
||||
"response_model": DummyOutput,
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def mock_deps_no_tools(self):
|
||||
"""Dependencies WITHOUT tools — response_model should still be passed."""
|
||||
from pydantic import BaseModel as _BaseModel
|
||||
|
||||
class DummyOutput(_BaseModel):
|
||||
result: str
|
||||
|
||||
llm = Mock()
|
||||
llm.stop = []
|
||||
llm.supports_stop_words.return_value = True
|
||||
|
||||
task = Mock()
|
||||
task.description = "Test task"
|
||||
task.human_input = False
|
||||
task.response_model = DummyOutput
|
||||
|
||||
crew = Mock()
|
||||
crew.verbose = False
|
||||
crew._train = False
|
||||
|
||||
agent = Mock()
|
||||
agent.id = "test-agent"
|
||||
agent.role = "Tester"
|
||||
agent.verbose = False
|
||||
agent.key = "test-key"
|
||||
|
||||
return {
|
||||
"llm": llm,
|
||||
"task": task,
|
||||
"crew": crew,
|
||||
"agent": agent,
|
||||
"prompt": {"prompt": "Test {input} {tool_names} {tools}"},
|
||||
"max_iter": 10,
|
||||
"tools": [],
|
||||
"tools_names": "",
|
||||
"stop_words": [],
|
||||
"tools_description": "",
|
||||
"tools_handler": Mock(spec=_ToolsHandler),
|
||||
"original_tools": [],
|
||||
"response_model": DummyOutput,
|
||||
}
|
||||
|
||||
# -- experimental executor: call_llm_and_parse (ReAct path) -----------
|
||||
|
||||
@patch("crewai.experimental.agent_executor.get_llm_response")
|
||||
@patch("crewai.experimental.agent_executor.enforce_rpm_limit")
|
||||
def test_call_llm_and_parse_omits_response_model_when_tools_present(
|
||||
self, mock_enforce_rpm, mock_get_llm, mock_deps_with_tools,
|
||||
):
|
||||
"""call_llm_and_parse must pass response_model=None when tools exist."""
|
||||
mock_enforce_rpm.return_value = None
|
||||
mock_get_llm.return_value = (
|
||||
"Thought: I need to search\n"
|
||||
"Action: search\n"
|
||||
"Action Input: {\"query\": \"test\"}\n"
|
||||
)
|
||||
|
||||
executor = _build_executor(**mock_deps_with_tools)
|
||||
executor.call_llm_and_parse()
|
||||
|
||||
# Verify get_llm_response was called with response_model=None
|
||||
call_kwargs = mock_get_llm.call_args
|
||||
assert call_kwargs is not None
|
||||
assert call_kwargs.kwargs.get("response_model") is None or (
|
||||
len(call_kwargs.args) > 7 and call_kwargs.args[7] is None
|
||||
), "response_model must be None when tools are present"
|
||||
|
||||
@patch("crewai.experimental.agent_executor.get_llm_response")
|
||||
@patch("crewai.experimental.agent_executor.enforce_rpm_limit")
|
||||
def test_call_llm_and_parse_keeps_response_model_when_no_tools(
|
||||
self, mock_enforce_rpm, mock_get_llm, mock_deps_no_tools,
|
||||
):
|
||||
"""call_llm_and_parse should still pass response_model when no tools."""
|
||||
from pydantic import BaseModel as _BaseModel
|
||||
|
||||
mock_enforce_rpm.return_value = None
|
||||
# Return a BaseModel-like response when response_model is passed
|
||||
mock_get_llm.return_value = '{"result": "done"}'
|
||||
|
||||
executor = _build_executor(**mock_deps_no_tools)
|
||||
executor.call_llm_and_parse()
|
||||
|
||||
call_kwargs = mock_get_llm.call_args
|
||||
assert call_kwargs is not None
|
||||
rm = call_kwargs.kwargs.get("response_model")
|
||||
assert rm is not None, (
|
||||
"response_model should be passed when no tools are present"
|
||||
)
|
||||
|
||||
# -- experimental executor: call_llm_native_tools ---------------------
|
||||
|
||||
@patch("crewai.experimental.agent_executor.get_llm_response")
|
||||
@patch("crewai.experimental.agent_executor.enforce_rpm_limit")
|
||||
def test_call_llm_native_tools_omits_response_model(
|
||||
self,
|
||||
mock_enforce_rpm,
|
||||
mock_get_llm,
|
||||
mock_deps_with_tools,
|
||||
):
|
||||
"""call_llm_native_tools must always pass response_model=None."""
|
||||
mock_enforce_rpm.return_value = None
|
||||
mock_get_llm.return_value = "Final answer text"
|
||||
|
||||
executor = _build_executor(**mock_deps_with_tools)
|
||||
# Pre-set the openai_tools attribute (normally done by _setup_native_tools)
|
||||
executor._openai_tools = []
|
||||
executor._available_functions = {}
|
||||
executor._tool_name_mapping = {}
|
||||
executor.call_llm_native_tools()
|
||||
|
||||
call_kwargs = mock_get_llm.call_args
|
||||
assert call_kwargs is not None
|
||||
assert call_kwargs.kwargs.get("response_model") is None, (
|
||||
"response_model must be None in native tool-calling loop"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user