diff --git a/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor.py b/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor.py index d251b1d36..24d68b0a3 100644 --- a/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor.py +++ b/lib/crewai/src/crewai/agents/agent_builder/base_agent_executor.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import TYPE_CHECKING -from pydantic import BaseModel, Field, PrivateAttr +from pydantic import BaseModel as PydanticBaseModel, Field, PrivateAttr from crewai.agents.parser import AgentFinish from crewai.memory.utils import sanitize_scope_name @@ -16,7 +16,7 @@ if TYPE_CHECKING: from crewai.task import Task -class BaseAgentExecutor(BaseModel): +class BaseAgentExecutor(PydanticBaseModel): model_config = {"arbitrary_types_allowed": True} executor_type: str = "base" @@ -28,6 +28,26 @@ class BaseAgentExecutor(BaseModel): messages: list[LLMMessage] = Field(default_factory=list) _resuming: bool = PrivateAttr(default=False) + def _loop_response_model(self) -> type[PydanticBaseModel] | None: + """Return the response_model to forward to the LLM during a tool loop. + + When the executor has tools, returns ``None``. Sending a strict + structured-output schema alongside ``tools=`` makes many providers + (notably OpenAI's Responses API with ``text.format`` json_schema) + constrain the first response to the schema, so the model emits + placeholder JSON instead of calling tools. Schema conversion is + applied as post-processing in ``Task._export_output()`` (Crew path) + or via ``Converter`` in ``Agent._build_output_from_result()`` + (standalone kickoff path). + + Subclasses must define ``original_tools`` and ``response_model`` + attributes; this base implementation reads them via ``getattr`` so + the helper works for any executor that exposes those names. + """ + if getattr(self, "original_tools", None): + return None + return getattr(self, "response_model", None) + def _save_to_memory(self, output: AgentFinish) -> None: """Save task result to unified memory (memory or crew._memory).""" if self.agent is None: diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index 62369bfb9..fba95ae5c 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -334,6 +334,7 @@ class CrewAgentExecutor(BaseAgentExecutor): enforce_rpm_limit(self.request_within_rpm_limit) + loop_response_model = self._loop_response_model() answer = get_llm_response( llm=cast("BaseLLM", self.llm), messages=self.messages, @@ -341,11 +342,11 @@ class CrewAgentExecutor(BaseAgentExecutor): printer=PRINTER, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=loop_response_model, executor_context=self, verbose=self.agent.verbose, ) - if self.response_model is not None: + if loop_response_model is not None: try: if isinstance(answer, BaseModel): output_json = answer.model_dump_json() @@ -355,7 +356,7 @@ class CrewAgentExecutor(BaseAgentExecutor): text=output_json, ) else: - self.response_model.model_validate_json(answer) + loop_response_model.model_validate_json(answer) formatted_answer = AgentFinish( thought="", output=answer, @@ -486,7 +487,7 @@ class CrewAgentExecutor(BaseAgentExecutor): available_functions=None, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=self._loop_response_model(), executor_context=self, verbose=self.agent.verbose, ) @@ -1142,6 +1143,7 @@ class CrewAgentExecutor(BaseAgentExecutor): enforce_rpm_limit(self.request_within_rpm_limit) + loop_response_model = self._loop_response_model() answer = await aget_llm_response( llm=cast("BaseLLM", self.llm), messages=self.messages, @@ -1149,12 +1151,12 @@ class CrewAgentExecutor(BaseAgentExecutor): printer=PRINTER, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=loop_response_model, executor_context=self, verbose=self.agent.verbose, ) - if self.response_model is not None: + if loop_response_model is not None: try: if isinstance(answer, BaseModel): output_json = answer.model_dump_json() @@ -1164,7 +1166,7 @@ class CrewAgentExecutor(BaseAgentExecutor): text=output_json, ) else: - self.response_model.model_validate_json(answer) + loop_response_model.model_validate_json(answer) formatted_answer = AgentFinish( thought="", output=answer, @@ -1295,7 +1297,7 @@ class CrewAgentExecutor(BaseAgentExecutor): available_functions=None, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=self._loop_response_model(), executor_context=self, verbose=self.agent.verbose, ) diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index ef33fab43..8445c5965 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -1229,7 +1229,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor printer=PRINTER, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=self._loop_response_model(), executor_context=self, verbose=self.agent.verbose, ) @@ -1317,7 +1317,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): # type: ignor available_functions=None, from_task=self.task, from_agent=self.agent, - response_model=self.response_model, + response_model=self._loop_response_model(), executor_context=self, verbose=self.agent.verbose, ) diff --git a/lib/crewai/tests/agents/test_agent_executor.py b/lib/crewai/tests/agents/test_agent_executor.py index 3413e30ac..c965824df 100644 --- a/lib/crewai/tests/agents/test_agent_executor.py +++ b/lib/crewai/tests/agents/test_agent_executor.py @@ -2044,3 +2044,103 @@ class TestVisionImageFormatContract: assert hasattr(AnthropicCompletion, "_convert_image_blocks"), ( "Anthropic provider must have _convert_image_blocks for auto-conversion" ) + + +class TestLoopResponseModelGuard: + """Regression: response_model must not be sent to the LLM during a tool loop. + + Bug: when output_pydantic + tools are both set, OpenAI's Responses API + (and several other providers) constrain the first response to the + structured-output schema, so the model emits placeholder JSON instead of + calling tools. Schema conversion is post-processed, so the LLM call must + omit response_model whenever tools are present. + """ + + def _make_pydantic_model(self): + from pydantic import BaseModel as _BaseModel + + class _Out(_BaseModel): + answer: str + + return _Out + + def _crew_executor(self, *, with_tools: bool, response_model): + from crewai.agents.crew_agent_executor import CrewAgentExecutor + + tool = Mock() + tool.name = "search" + tool.description = "search" + + executor = CrewAgentExecutor.model_construct( + response_model=response_model, + original_tools=[tool] if with_tools else [], + ) + return executor + + def test_crew_helper_strips_when_tools_present(self): + Model = self._make_pydantic_model() + ex = self._crew_executor(with_tools=True, response_model=Model) + assert ex._loop_response_model() is None + + def test_crew_helper_preserves_when_no_tools(self): + Model = self._make_pydantic_model() + ex = self._crew_executor(with_tools=False, response_model=Model) + assert ex._loop_response_model() is Model + + def test_crew_helper_returns_none_when_no_response_model(self): + ex = self._crew_executor(with_tools=False, response_model=None) + assert ex._loop_response_model() is None + + def test_experimental_helper_strips_when_tools_present(self): + Model = self._make_pydantic_model() + llm = Mock() + llm.supports_stop_words.return_value = False + tool = Mock() + tool.name = "search" + ex = _build_executor(llm=llm, response_model=Model, original_tools=[tool]) + assert ex._loop_response_model() is None + + def test_experimental_helper_preserves_when_no_tools(self): + Model = self._make_pydantic_model() + llm = Mock() + llm.supports_stop_words.return_value = False + ex = _build_executor(llm=llm, response_model=Model, original_tools=[]) + assert ex._loop_response_model() is Model + + def test_loop_call_sites_route_through_helper(self): + """Source-level guard: every in-loop LLM call must use the helper. + + This catches regressions where someone reintroduces + `response_model=self.response_model` inside a tool loop. + """ + import inspect + from crewai.agents import crew_agent_executor as _crew_mod + from crewai.experimental import agent_executor as _exp_mod + + for mod, names in ( + ( + _crew_mod, + ( + "_invoke_loop_react", + "_invoke_loop_native_tools", + "_ainvoke_loop_react", + "_ainvoke_loop_native_tools", + ), + ), + ( + _exp_mod, + ("call_llm_and_parse", "call_llm_native_tools"), + ), + ): + cls = ( + _crew_mod.CrewAgentExecutor + if mod is _crew_mod + else _exp_mod.AgentExecutor + ) + for name in names: + src = inspect.getsource(getattr(cls, name)) + assert "response_model=self.response_model" not in src, ( + f"{cls.__name__}.{name} forwards self.response_model directly. " + "Use self._loop_response_model() instead — see " + "TestLoopResponseModelGuard for the bug it prevents." + )