From 10384152d323f7a6e375afb6e35056f13b1ac8ca Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 4 Mar 2026 12:36:59 +0000
Subject: [PATCH] fix: prevent tools from being discarded when response_model
 is set (#4697)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When both native function calling (tools) and output_pydantic (response_model)
are enabled, InternalInstructor was intercepting the LLM call and creating its
own completion without passing tools. This caused agent tools to be silently
discarded.

The fix checks whether tools are present in params before routing to
InternalInstructor. When tools exist, the normal litellm.completion path is
used so the LLM can see and call the agent's tools. InternalInstructor is
still used when no tools are present (backward compatible).

Applied to all three response handlers:
- _handle_non_streaming_response
- _ahandle_non_streaming_response
- _handle_streaming_response

Co-Authored-By: João <joao@crewai.com>
---
 lib/crewai/src/crewai/llm.py |  15 ++-
 lib/crewai/tests/test_llm.py | 199 +++++++++++++++++++++++++++++++++++
 2 files changed, 211 insertions(+), 3 deletions(-)

diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index 8a4ac2edd..1c6a9b060 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -968,7 +968,10 @@ class LLM(BaseLLM):
             self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
 
             if not tool_calls or not available_functions:
-                if response_model and self.is_litellm:
+                # Only use InternalInstructor for structured output when there
+                # are no tools — otherwise tools would be silently discarded.
+                has_tools = bool(params.get("tools"))
+                if response_model and self.is_litellm and not has_tools:
                     instructor_instance = InternalInstructor(
                         content=full_response,
                         model=response_model,
@@ -1150,7 +1153,10 @@ class LLM(BaseLLM):
             str: The response text
         """
         # --- 1) Handle response_model with InternalInstructor for LiteLLM
-        if response_model and self.is_litellm:
+        # Skip InternalInstructor when tools are present so the LLM can
+        # see and call the agent's tools before returning structured output.
+        has_tools = bool(params.get("tools"))
+        if response_model and self.is_litellm and not has_tools:
             from crewai.utilities.internal_instructor import InternalInstructor
 
             messages = params.get("messages", [])
@@ -1290,7 +1296,10 @@ class LLM(BaseLLM):
         Returns:
             str: The response text
         """
-        if response_model and self.is_litellm:
+        # Skip InternalInstructor when tools are present so the LLM can
+        # see and call the agent's tools before returning structured output.
+        has_tools = bool(params.get("tools"))
+        if response_model and self.is_litellm and not has_tools:
             from crewai.utilities.internal_instructor import InternalInstructor
 
             messages = params.get("messages", [])
diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py
index 71cb69790..1b2066756 100644
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -1022,3 +1022,202 @@ async def test_usage_info_streaming_with_acall():
     assert llm._token_usage["total_tokens"] > 0
 
     assert len(result) > 0
+
+
+# --- Tests for issue #4697: tools discarded when response_model is set ---
+
+
+class MyOutput(BaseModel):
+    name: str
+    value: str
+
+
+def _make_tool_call_response():
+    """Create a mock litellm response that contains a tool call."""
+    mock_message = MagicMock()
+    mock_message.content = ""
+    mock_tool_call = MagicMock()
+    mock_tool_call.function.name = "my_search_tool"
+    mock_tool_call.function.arguments = '{"query": "test"}'
+    mock_tool_call.id = "call_123"
+    mock_message.tool_calls = [mock_tool_call]
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+    return mock_response
+
+
+def _make_text_response(text: str = '{"name": "Alice", "value": "42"}'):
+    """Create a mock litellm response that contains only text content."""
+    mock_message = MagicMock()
+    mock_message.content = text
+    mock_message.tool_calls = []
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+    return mock_response
+
+
+def _get_tool_schema():
+    return {
+        "type": "function",
+        "function": {
+            "name": "my_search_tool",
+            "description": "Search for data",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "The search query"}
+                },
+                "required": ["query"],
+            },
+        },
+    }
+
+
+def test_non_streaming_response_model_with_tools_skips_instructor():
+    """When response_model AND tools are both present, InternalInstructor must NOT
+    be used so that LiteLLM sees the tools and can return tool_calls."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.completion", return_value=tool_call_resp) as mock_completion:
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,  # executor handles tool execution
+            response_model=MyOutput,
+        )
+
+    # litellm.completion must have been called (not InternalInstructor)
+    mock_completion.assert_called_once()
+    # The result should be tool_calls (a list), not a structured JSON string
+    assert isinstance(result, list), (
+        "Expected tool_calls list but got a string/model — "
+        "InternalInstructor likely intercepted the call"
+    )
+
+
+def test_non_streaming_response_model_without_tools_uses_instructor():
+    """When response_model is set but NO tools, InternalInstructor should still
+    be used for structured output conversion."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    with patch(
+        "crewai.utilities.internal_instructor.InternalInstructor"
+    ) as MockInstructor:
+        mock_instance = MagicMock()
+        mock_pydantic = MyOutput(name="Alice", value="42")
+        mock_instance.to_pydantic.return_value = mock_pydantic
+        MockInstructor.return_value = mock_instance
+
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=None,
+            response_model=MyOutput,
+        )
+
+    # InternalInstructor should have been used
+    MockInstructor.assert_called_once()
+    assert '"Alice"' in result
+
+
+def test_non_streaming_response_model_with_tools_returns_tool_calls():
+    """Verify that when LLM returns tool_calls with response_model + tools,
+    the tool_calls are returned to the caller (not swallowed)."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.completion", return_value=tool_call_resp):
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    # The result should be the raw tool_calls list
+    assert isinstance(result, list)
+    assert len(result) == 1
+    assert result[0].function.name == "my_search_tool"
+
+
+def test_non_streaming_response_model_with_tools_text_response():
+    """When LLM returns text (not tool_calls) with both response_model and tools,
+    the text response should be returned normally."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    text_resp = _make_text_response('{"name": "Alice", "value": "42"}')
+
+    with patch("litellm.completion", return_value=text_resp):
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    # Text response should be returned as-is
+    assert isinstance(result, str)
+    assert "Alice" in result
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_response_model_with_tools_skips_instructor():
+    """Async variant: response_model + tools should skip InternalInstructor."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.acompletion", return_value=tool_call_resp) as mock_acompletion:
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    mock_acompletion.assert_called_once()
+    assert isinstance(result, list), (
+        "Expected tool_calls list but got a string/model — "
+        "InternalInstructor likely intercepted the call"
+    )
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_response_model_without_tools_uses_instructor():
+    """Async variant: response_model without tools should use InternalInstructor."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    with patch(
+        "crewai.utilities.internal_instructor.InternalInstructor"
+    ) as MockInstructor:
+        mock_instance = MagicMock()
+        mock_pydantic = MyOutput(name="Alice", value="42")
+        mock_instance.to_pydantic.return_value = mock_pydantic
+        MockInstructor.return_value = mock_instance
+
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=None,
+            response_model=MyOutput,
+        )
+
+    MockInstructor.assert_called_once()
+    assert '"Alice"' in result