crewAI/lib/crewai/tests/agents/test_react_output_pydantic.py

"""Tests for output_pydantic behavior in ReAct flow when LLM doesn't support function calling.

Regression tests for https://github.com/crewAIInc/crewAI/issues/4695

When an LLM does NOT support function calling (supports_function_calling() returns False),
the executor should use the ReAct text-based pattern. In this path, response_model should
NOT be passed to the LLM call, because doing so forces structured output (via instructor/
tools mode) before the agent can reason through the Action/Observation loop.

The schema should still be embedded in the prompt text for guidance, and the final
conversion to pydantic/json should happen in task._export_output() after the ReAct loop.
"""

from __future__ import annotations

from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from pydantic import BaseModel, Field

from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.parser import AgentFinish


# ---------------------------------------------------------------------------
# Pydantic models used as output_pydantic in tests
# ---------------------------------------------------------------------------


class PersonInfo(BaseModel):
    """A simple pydantic model for testing output_pydantic."""

    name: str = Field(description="Person's name")
    age: int = Field(description="Person's age")


class WeatherReport(BaseModel):
    """Another pydantic model for testing output_pydantic."""

    city: str = Field(description="City name")
    temperature: float = Field(description="Temperature in Fahrenheit")
    condition: str = Field(description="Weather condition")


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


def _make_llm(*, supports_fc: bool) -> MagicMock:
    """Create a mock LLM with configurable function-calling support."""
    llm = MagicMock()
    llm.supports_function_calling.return_value = supports_fc
    llm.supports_stop_words.return_value = True
    llm.stop = []
    return llm


def _make_executor(
    llm: MagicMock,
    *,
    response_model: type[BaseModel] | None = None,
) -> CrewAgentExecutor:
    """Create a CrewAgentExecutor with the given LLM and response_model."""
    agent = MagicMock()
    agent.role = "Test Agent"
    agent.key = "test_agent_key"
    agent.verbose = False
    agent.id = "test_agent_id"

    task = MagicMock()
    task.description = "Test task"

    crew = MagicMock()
    crew.verbose = False
    crew._train = False

    executor = CrewAgentExecutor(
        llm=llm,
        task=task,
        crew=crew,
        agent=agent,
        prompt={"prompt": "Test prompt {input} {tool_names} {tools}"},
        max_iter=5,
        tools=[],
        tools_names="",
        stop_words=["Observation:"],
        tools_description="",
        tools_handler=MagicMock(),
        response_model=response_model,
    )
    return executor


# ===========================================================================
# Sync tests
# ===========================================================================


class TestReActFlowDoesNotPassResponseModel:
    """Verify that _invoke_loop_react does NOT pass response_model to LLM."""

    def test_react_flow_passes_none_response_model_when_output_pydantic_set(
        self,
    ) -> None:
        """When output_pydantic is set but LLM lacks function calling,
        response_model must be None in the get_llm_response call."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=PersonInfo)

        with patch(
            "crewai.agents.crew_agent_executor.get_llm_response",
            return_value="Thought: I know the answer\nFinal Answer: John is 30 years old",
        ) as mock_get_llm:
            with patch.object(executor, "_show_logs"):
                result = executor._invoke_loop()

        # The critical assertion: response_model must be None in ReAct flow
        call_kwargs = mock_get_llm.call_args
        assert call_kwargs.kwargs.get("response_model") is None, (
            "response_model should be None in ReAct flow, but got "
            f"{call_kwargs.kwargs.get('response_model')}"
        )
        assert isinstance(result, AgentFinish)

    def test_react_flow_does_not_use_instructor_for_non_fc_llm(self) -> None:
        """Ensure InternalInstructor is never invoked in the ReAct path."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=WeatherReport)

        with patch(
            "crewai.agents.crew_agent_executor.get_llm_response",
            return_value="Thought: I found the weather\nFinal Answer: It is sunny in NYC at 72F",
        ):
            with patch.object(executor, "_show_logs"):
                with patch(
                    "crewai.utilities.internal_instructor.InternalInstructor"
                ) as mock_instructor:
                    executor._invoke_loop()

        mock_instructor.assert_not_called()

    def test_invoke_loop_routes_to_react_when_no_function_calling(self) -> None:
        """Confirm _invoke_loop routes to _invoke_loop_react when
        supports_function_calling() returns False."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=PersonInfo)

        with patch.object(
            executor,
            "_invoke_loop_react",
            return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
        ) as mock_react:
            with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
                executor._invoke_loop()

        mock_react.assert_called_once()
        mock_native.assert_not_called()

    def test_invoke_loop_routes_to_native_when_function_calling_supported(
        self,
    ) -> None:
        """Confirm _invoke_loop routes to _invoke_loop_native_tools when
        supports_function_calling() returns True AND tools are present."""
        llm = _make_llm(supports_fc=True)
        executor = _make_executor(llm, response_model=PersonInfo)
        # Need at least one tool for native path
        executor.original_tools = [MagicMock()]

        with patch.object(
            executor,
            "_invoke_loop_native_tools",
            return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
        ) as mock_native:
            with patch.object(executor, "_invoke_loop_react") as mock_react:
                executor._invoke_loop()

        mock_native.assert_called_once()
        mock_react.assert_not_called()

    def test_invoke_loop_routes_to_native_no_tools_when_fc_no_tools_with_response_model(
        self,
    ) -> None:
        """When LLM supports FC, has no tools (including internal tools),
        but HAS a response_model, route to _invoke_loop_native_no_tools
        (which correctly passes response_model for structured output)."""
        llm = _make_llm(supports_fc=True)
        executor = _make_executor(llm, response_model=PersonInfo)
        # No user-defined or internal tools
        executor.original_tools = []
        executor.tools = []

        with patch.object(
            executor,
            "_invoke_loop_native_no_tools",
            return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
        ) as mock_native_no_tools:
            with patch.object(executor, "_invoke_loop_react") as mock_react:
                with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
                    executor._invoke_loop()

        mock_native_no_tools.assert_called_once()
        mock_react.assert_not_called()
        mock_native.assert_not_called()

    def test_invoke_loop_routes_to_react_when_fc_no_orig_tools_but_internal_tools(
        self,
    ) -> None:
        """When LLM supports FC, has no original_tools but HAS internal tools
        (e.g. delegation), fall through to ReAct even with response_model.
        Internal tools need the ReAct loop for Action/Observation cycles."""
        llm = _make_llm(supports_fc=True)
        executor = _make_executor(llm, response_model=PersonInfo)
        executor.original_tools = []
        # Internal tools present (e.g. delegation tool)
        executor.tools = [MagicMock()]

        with patch.object(
            executor,
            "_invoke_loop_react",
            return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
        ) as mock_react:
            with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools:
                with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
                    executor._invoke_loop()

        mock_react.assert_called_once()
        mock_native_no_tools.assert_not_called()
        mock_native.assert_not_called()

    def test_invoke_loop_routes_to_react_when_fc_no_tools_no_response_model(
        self,
    ) -> None:
        """When LLM supports FC, has no tools, and NO response_model,
        fall through to ReAct path (no structured output to preserve)."""
        llm = _make_llm(supports_fc=True)
        executor = _make_executor(llm, response_model=None)
        executor.original_tools = []
        executor.tools = []

        with patch.object(
            executor,
            "_invoke_loop_react",
            return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
        ) as mock_react:
            with patch.object(executor, "_invoke_loop_native_no_tools") as mock_native_no_tools:
                with patch.object(executor, "_invoke_loop_native_tools") as mock_native:
                    executor._invoke_loop()

        mock_react.assert_called_once()
        mock_native_no_tools.assert_not_called()
        mock_native.assert_not_called()

    def test_react_flow_still_works_with_tool_usage(self) -> None:
        """Verify the ReAct loop still processes Action/Observation cycles
        correctly even when output_pydantic is set."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=PersonInfo)

        call_count = 0

        def mock_llm_response(*args: Any, **kwargs: Any) -> str:
            nonlocal call_count
            call_count += 1
            # Verify response_model is None on every call
            assert kwargs.get("response_model") is None, (
                f"response_model should be None in ReAct flow (call {call_count})"
            )
            if call_count == 1:
                return (
                    "Thought: I need to search for the person\n"
                    "Action: search_tool\n"
                    'Action Input: {"query": "John Doe"}'
                )
            return (
                "Thought: I found the person info\n"
                "Final Answer: John Doe is 30 years old"
            )

        from crewai.tools.tool_types import ToolResult

        with patch(
            "crewai.agents.crew_agent_executor.get_llm_response",
            side_effect=mock_llm_response,
        ):
            with patch(
                "crewai.agents.crew_agent_executor.execute_tool_and_check_finality",
                return_value=ToolResult(result="John Doe, age 30", result_as_answer=False),
            ):
                with patch.object(executor, "_show_logs"):
                    with patch.object(executor, "_handle_agent_action") as mock_handle:
                        from crewai.agents.parser import AgentAction

                        mock_handle.return_value = AgentAction(
                            text="Tool result",
                            tool="search_tool",
                            tool_input='{"query": "John Doe"}',
                            thought="Used tool",
                            result="John Doe, age 30",
                        )
                        result = executor._invoke_loop()

        assert isinstance(result, AgentFinish)
        assert call_count == 2, f"Expected 2 LLM calls, got {call_count}"

    def test_react_flow_without_response_model_unchanged(self) -> None:
        """Verify the ReAct flow still works normally when no response_model is set."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=None)

        with patch(
            "crewai.agents.crew_agent_executor.get_llm_response",
            return_value="Thought: Simple answer\nFinal Answer: Hello world",
        ) as mock_get_llm:
            with patch.object(executor, "_show_logs"):
                result = executor._invoke_loop()

        call_kwargs = mock_get_llm.call_args
        assert call_kwargs.kwargs.get("response_model") is None
        assert isinstance(result, AgentFinish)


# ===========================================================================
# Async tests
# ===========================================================================


class TestAsyncReActFlowDoesNotPassResponseModel:
    """Verify that _ainvoke_loop_react does NOT pass response_model to LLM."""

    @pytest.mark.asyncio
    async def test_async_react_flow_passes_none_response_model(self) -> None:
        """Async variant: response_model must be None in ReAct flow."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=PersonInfo)

        with patch(
            "crewai.agents.crew_agent_executor.aget_llm_response",
            new_callable=AsyncMock,
            return_value="Thought: I know\nFinal Answer: John is 30",
        ) as mock_aget_llm:
            with patch.object(executor, "_show_logs"):
                result = await executor._ainvoke_loop()

        call_kwargs = mock_aget_llm.call_args
        assert call_kwargs.kwargs.get("response_model") is None, (
            "response_model should be None in async ReAct flow"
        )
        assert isinstance(result, AgentFinish)

    @pytest.mark.asyncio
    async def test_async_invoke_loop_routes_to_react_when_no_fc(self) -> None:
        """Async: _ainvoke_loop routes to _ainvoke_loop_react when
        supports_function_calling() returns False."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=WeatherReport)

        with patch.object(
            executor,
            "_ainvoke_loop_react",
            new_callable=AsyncMock,
            return_value=AgentFinish(thought="done", output="test", text="Final Answer: test"),
        ) as mock_react:
            with patch.object(executor, "_ainvoke_loop_native_tools") as mock_native:
                await executor._ainvoke_loop()

        mock_react.assert_called_once()
        mock_native.assert_not_called()

    @pytest.mark.asyncio
    async def test_async_react_flow_with_tool_usage(self) -> None:
        """Async: ReAct loop processes tool calls correctly with output_pydantic."""
        llm = _make_llm(supports_fc=False)
        executor = _make_executor(llm, response_model=PersonInfo)

        call_count = 0

        async def mock_llm_response(*args: Any, **kwargs: Any) -> str:
            nonlocal call_count
            call_count += 1
            assert kwargs.get("response_model") is None
            if call_count == 1:
                return (
                    "Thought: I need to search\n"
                    "Action: search_tool\n"
                    'Action Input: {"query": "test"}'
                )
            return "Thought: Done\nFinal Answer: Result found"

        from crewai.tools.tool_types import ToolResult

        with patch(
            "crewai.agents.crew_agent_executor.aget_llm_response",
            new_callable=AsyncMock,
            side_effect=mock_llm_response,
        ):
            with patch(
                "crewai.agents.crew_agent_executor.aexecute_tool_and_check_finality",
                new_callable=AsyncMock,
                return_value=ToolResult(result="Found it", result_as_answer=False),
            ):
                with patch.object(executor, "_show_logs"):
                    with patch.object(executor, "_handle_agent_action") as mock_handle:
                        from crewai.agents.parser import AgentAction

                        mock_handle.return_value = AgentAction(
                            text="Tool result",
                            tool="search_tool",
                            tool_input='{"query": "test"}',
                            thought="Searching",
                            result="Found it",
                        )
                        result = await executor._ainvoke_loop()

        assert isinstance(result, AgentFinish)
        assert call_count == 2


# ===========================================================================
# Integration-style tests (Crew-level)
# ===========================================================================


class TestCrewLevelOutputPydanticWithNonFCModel:
    """Higher-level tests verifying that a Crew with output_pydantic works
    correctly when the LLM doesn't support function calling."""

    def test_crew_output_pydantic_with_non_fc_llm_uses_react(self) -> None:
        """A Crew with output_pydantic should still use ReAct flow and NOT
        pass response_model to the LLM when it doesn't support FC."""
        from crewai import Agent, Crew, Task

        llm = MagicMock()
        llm.supports_function_calling.return_value = False
        llm.supports_stop_words.return_value = True
        llm.stop = []
        llm.model = "ollama/llama3"
        # Return a valid ReAct final answer
        llm.call.return_value = (
            "Thought: I know the answer\n"
            'Final Answer: {"name": "John Doe", "age": 30}'
        )

        # Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM
        with patch("crewai.agent.core.create_llm", return_value=llm):
            agent = Agent(
                role="Researcher",
                goal="Find person info",
                backstory="You research people.",
                llm=llm,
                verbose=False,
            )

            task = Task(
                description="Find info about John Doe",
                expected_output="Person info as JSON",
                agent=agent,
                output_pydantic=PersonInfo,
            )

            crew = Crew(agents=[agent], tasks=[task], verbose=False)
            result = crew.kickoff()

        # Verify llm.call was invoked
        assert llm.call.called

        # Verify response_model was NOT passed to llm.call
        for call_args in llm.call.call_args_list:
            rm = call_args.kwargs.get("response_model")
            assert rm is None, (
                f"response_model should be None for non-FC LLM, got {rm}"
            )

        assert result is not None

    def test_crew_output_pydantic_with_fc_llm_uses_native_tools(self) -> None:
        """A Crew with output_pydantic and an FC-capable LLM should use
        native tools flow and CAN pass response_model."""
        from crewai import Agent, Crew, Task

        llm = MagicMock()
        llm.supports_function_calling.return_value = True
        llm.supports_stop_words.return_value = True
        llm.stop = []
        llm.model = "gpt-4o-mini"
        # Return a valid final answer (no tool calls)
        llm.call.return_value = '{"name": "Jane Doe", "age": 25}'

        # Patch create_llm so Agent.__init__ doesn't try to instantiate a real LLM
        with patch("crewai.agent.core.create_llm", return_value=llm):
            agent = Agent(
                role="Researcher",
                goal="Find person info",
                backstory="You research people.",
                llm=llm,
                verbose=False,
            )

            task = Task(
                description="Find info about Jane Doe",
                expected_output="Person info as JSON",
                agent=agent,
                output_pydantic=PersonInfo,
            )

            crew = Crew(agents=[agent], tasks=[task], verbose=False)
            result = crew.kickoff()

        assert result is not None