mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-04 16:52:37 +00:00
refactor: implement _loop_response_model to manage response_model during tool loops
This change introduces the _loop_response_model method in BaseAgentExecutor to conditionally return the response_model based on the presence of tools. The CrewAgentExecutor and AgentExecutor classes have been updated to utilize this new method, ensuring that the response_model is not sent to the LLM during tool loops, which prevents issues with structured-output schemas. Additionally, tests have been added to verify the correct behavior of this implementation.
This commit is contained in:
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
from pydantic import BaseModel as PydanticBaseModel, Field, PrivateAttr
|
||||
|
||||
from crewai.agents.parser import AgentFinish
|
||||
from crewai.memory.utils import sanitize_scope_name
|
||||
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
|
||||
from crewai.task import Task
|
||||
|
||||
|
||||
class BaseAgentExecutor(BaseModel):
|
||||
class BaseAgentExecutor(PydanticBaseModel):
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
executor_type: str = "base"
|
||||
@@ -28,6 +28,26 @@ class BaseAgentExecutor(BaseModel):
|
||||
messages: list[LLMMessage] = Field(default_factory=list)
|
||||
_resuming: bool = PrivateAttr(default=False)
|
||||
|
||||
def _loop_response_model(self) -> type[PydanticBaseModel] | None:
|
||||
"""Return the response_model to forward to the LLM during a tool loop.
|
||||
|
||||
When the executor has tools, returns ``None``. Sending a strict
|
||||
structured-output schema alongside ``tools=`` makes many providers
|
||||
(notably OpenAI's Responses API with ``text.format`` json_schema)
|
||||
constrain the first response to the schema, so the model emits
|
||||
placeholder JSON instead of calling tools. Schema conversion is
|
||||
applied as post-processing in ``Task._export_output()`` (Crew path)
|
||||
or via ``Converter`` in ``Agent._build_output_from_result()``
|
||||
(standalone kickoff path).
|
||||
|
||||
Subclasses must define ``original_tools`` and ``response_model``
|
||||
attributes; this base implementation reads them via ``getattr`` so
|
||||
the helper works for any executor that exposes those names.
|
||||
"""
|
||||
if getattr(self, "original_tools", None):
|
||||
return None
|
||||
return getattr(self, "response_model", None)
|
||||
|
||||
def _save_to_memory(self, output: AgentFinish) -> None:
|
||||
"""Save task result to unified memory (memory or crew._memory)."""
|
||||
if self.agent is None:
|
||||
|
||||
@@ -334,6 +334,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
loop_response_model = self._loop_response_model()
|
||||
answer = get_llm_response(
|
||||
llm=cast("BaseLLM", self.llm),
|
||||
messages=self.messages,
|
||||
@@ -341,11 +342,11 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
printer=PRINTER,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=loop_response_model,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
if self.response_model is not None:
|
||||
if loop_response_model is not None:
|
||||
try:
|
||||
if isinstance(answer, BaseModel):
|
||||
output_json = answer.model_dump_json()
|
||||
@@ -355,7 +356,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
text=output_json,
|
||||
)
|
||||
else:
|
||||
self.response_model.model_validate_json(answer)
|
||||
loop_response_model.model_validate_json(answer)
|
||||
formatted_answer = AgentFinish(
|
||||
thought="",
|
||||
output=answer,
|
||||
@@ -486,7 +487,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
available_functions=None,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=self._loop_response_model(),
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
@@ -1142,6 +1143,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
|
||||
enforce_rpm_limit(self.request_within_rpm_limit)
|
||||
|
||||
loop_response_model = self._loop_response_model()
|
||||
answer = await aget_llm_response(
|
||||
llm=cast("BaseLLM", self.llm),
|
||||
messages=self.messages,
|
||||
@@ -1149,12 +1151,12 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
printer=PRINTER,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=loop_response_model,
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
|
||||
if self.response_model is not None:
|
||||
if loop_response_model is not None:
|
||||
try:
|
||||
if isinstance(answer, BaseModel):
|
||||
output_json = answer.model_dump_json()
|
||||
@@ -1164,7 +1166,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
text=output_json,
|
||||
)
|
||||
else:
|
||||
self.response_model.model_validate_json(answer)
|
||||
loop_response_model.model_validate_json(answer)
|
||||
formatted_answer = AgentFinish(
|
||||
thought="",
|
||||
output=answer,
|
||||
@@ -1295,7 +1297,7 @@ class CrewAgentExecutor(BaseAgentExecutor):
|
||||
available_functions=None,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=self._loop_response_model(),
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
|
||||
@@ -1229,7 +1229,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor
|
||||
printer=PRINTER,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=self._loop_response_model(),
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
@@ -1317,7 +1317,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor
|
||||
available_functions=None,
|
||||
from_task=self.task,
|
||||
from_agent=self.agent,
|
||||
response_model=self.response_model,
|
||||
response_model=self._loop_response_model(),
|
||||
executor_context=self,
|
||||
verbose=self.agent.verbose,
|
||||
)
|
||||
|
||||
@@ -2044,3 +2044,103 @@ class TestVisionImageFormatContract:
|
||||
assert hasattr(AnthropicCompletion, "_convert_image_blocks"), (
|
||||
"Anthropic provider must have _convert_image_blocks for auto-conversion"
|
||||
)
|
||||
|
||||
|
||||
class TestLoopResponseModelGuard:
|
||||
"""Regression: response_model must not be sent to the LLM during a tool loop.
|
||||
|
||||
Bug: when output_pydantic + tools are both set, OpenAI's Responses API
|
||||
(and several other providers) constrain the first response to the
|
||||
structured-output schema, so the model emits placeholder JSON instead of
|
||||
calling tools. Schema conversion is post-processed, so the LLM call must
|
||||
omit response_model whenever tools are present.
|
||||
"""
|
||||
|
||||
def _make_pydantic_model(self):
|
||||
from pydantic import BaseModel as _BaseModel
|
||||
|
||||
class _Out(_BaseModel):
|
||||
answer: str
|
||||
|
||||
return _Out
|
||||
|
||||
def _crew_executor(self, *, with_tools: bool, response_model):
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
|
||||
tool = Mock()
|
||||
tool.name = "search"
|
||||
tool.description = "search"
|
||||
|
||||
executor = CrewAgentExecutor.model_construct(
|
||||
response_model=response_model,
|
||||
original_tools=[tool] if with_tools else [],
|
||||
)
|
||||
return executor
|
||||
|
||||
def test_crew_helper_strips_when_tools_present(self):
|
||||
Model = self._make_pydantic_model()
|
||||
ex = self._crew_executor(with_tools=True, response_model=Model)
|
||||
assert ex._loop_response_model() is None
|
||||
|
||||
def test_crew_helper_preserves_when_no_tools(self):
|
||||
Model = self._make_pydantic_model()
|
||||
ex = self._crew_executor(with_tools=False, response_model=Model)
|
||||
assert ex._loop_response_model() is Model
|
||||
|
||||
def test_crew_helper_returns_none_when_no_response_model(self):
|
||||
ex = self._crew_executor(with_tools=False, response_model=None)
|
||||
assert ex._loop_response_model() is None
|
||||
|
||||
def test_experimental_helper_strips_when_tools_present(self):
|
||||
Model = self._make_pydantic_model()
|
||||
llm = Mock()
|
||||
llm.supports_stop_words.return_value = False
|
||||
tool = Mock()
|
||||
tool.name = "search"
|
||||
ex = _build_executor(llm=llm, response_model=Model, original_tools=[tool])
|
||||
assert ex._loop_response_model() is None
|
||||
|
||||
def test_experimental_helper_preserves_when_no_tools(self):
|
||||
Model = self._make_pydantic_model()
|
||||
llm = Mock()
|
||||
llm.supports_stop_words.return_value = False
|
||||
ex = _build_executor(llm=llm, response_model=Model, original_tools=[])
|
||||
assert ex._loop_response_model() is Model
|
||||
|
||||
def test_loop_call_sites_route_through_helper(self):
|
||||
"""Source-level guard: every in-loop LLM call must use the helper.
|
||||
|
||||
This catches regressions where someone reintroduces
|
||||
`response_model=self.response_model` inside a tool loop.
|
||||
"""
|
||||
import inspect
|
||||
from crewai.agents import crew_agent_executor as _crew_mod
|
||||
from crewai.experimental import agent_executor as _exp_mod
|
||||
|
||||
for mod, names in (
|
||||
(
|
||||
_crew_mod,
|
||||
(
|
||||
"_invoke_loop_react",
|
||||
"_invoke_loop_native_tools",
|
||||
"_ainvoke_loop_react",
|
||||
"_ainvoke_loop_native_tools",
|
||||
),
|
||||
),
|
||||
(
|
||||
_exp_mod,
|
||||
("call_llm_and_parse", "call_llm_native_tools"),
|
||||
),
|
||||
):
|
||||
cls = (
|
||||
_crew_mod.CrewAgentExecutor
|
||||
if mod is _crew_mod
|
||||
else _exp_mod.AgentExecutor
|
||||
)
|
||||
for name in names:
|
||||
src = inspect.getsource(getattr(cls, name))
|
||||
assert "response_model=self.response_model" not in src, (
|
||||
f"{cls.__name__}.{name} forwards self.response_model directly. "
|
||||
"Use self._loop_response_model() instead — see "
|
||||
"TestLoopResponseModelGuard for the bug it prevents."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user