fix: streaming handler returns tool_calls when available_functions is None

Mirrors _handle_non_streaming_response's explicit path: when tool_calls exist but available_functions is None, return the tool_calls to the caller instead of returning an empty full_response string. Co-Authored-By: João <joao@crewai.com>
fix: also skip response_model passthrough to litellm when tools are present
2026-04-30 23:02:50 +00:00 · 2026-03-04 12:54:33 +00:00 · 2026-03-04 12:48:53 +00:00 · 2026-03-04 12:36:59 +00:00
2 changed files with 281 additions and 5 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -967,8 +967,17 @@ class LLM(BaseLLM):
                self._track_token_usage_internal(usage_info)
            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)

+            # If there are tool calls but no available functions, return them
+            # so the caller (e.g., executor) can handle tool execution.
+            # This mirrors _handle_non_streaming_response's explicit path.
+            if tool_calls and not available_functions:
+                return tool_calls
+
            if not tool_calls or not available_functions:
-                if response_model and self.is_litellm:
+                # Only use InternalInstructor for structured output when there
+                # are no tools — otherwise tools would be silently discarded.
+                has_tools = bool(params.get("tools"))
+                if response_model and self.is_litellm and not has_tools:
                    instructor_instance = InternalInstructor(
                        content=full_response,
                        model=response_model,
@@ -1150,7 +1159,10 @@ class LLM(BaseLLM):
            str: The response text
        """
        # --- 1) Handle response_model with InternalInstructor for LiteLLM
-        if response_model and self.is_litellm:
+        # Skip InternalInstructor when tools are present so the LLM can
+        # see and call the agent's tools before returning structured output.
+        has_tools = bool(params.get("tools"))
+        if response_model and self.is_litellm and not has_tools:
            from crewai.utilities.internal_instructor import InternalInstructor

            messages = params.get("messages", [])
@@ -1183,7 +1195,11 @@ class LLM(BaseLLM):
            # and convert them to our own exception type for consistent handling
            # across the codebase. This allows CrewAgentExecutor to handle context
            # length issues appropriately.
-            if response_model:
+            # Only pass response_model to litellm when there are no tools.
+            # When tools are present, litellm's internal instructor would override
+            # the tools parameter, so we let the normal completion flow handle it
+            # and defer structured output conversion to the executor/converter.
+            if response_model and not has_tools:
                params["response_model"] = response_model
            response = litellm.completion(**params)

@@ -1290,7 +1306,10 @@ class LLM(BaseLLM):
        Returns:
            str: The response text
        """
-        if response_model and self.is_litellm:
+        # Skip InternalInstructor when tools are present so the LLM can
+        # see and call the agent's tools before returning structured output.
+        has_tools = bool(params.get("tools"))
+        if response_model and self.is_litellm and not has_tools:
            from crewai.utilities.internal_instructor import InternalInstructor

            messages = params.get("messages", [])
@@ -1318,7 +1337,11 @@ class LLM(BaseLLM):
            return structured_response

        try:
-            if response_model:
+            # Only pass response_model to litellm when there are no tools.
+            # When tools are present, litellm's internal instructor would override
+            # the tools parameter, so we let the normal completion flow handle it
+            # and defer structured output conversion to the executor/converter.
+            if response_model and not has_tools:
                params["response_model"] = response_model
            response = await litellm.acompletion(**params)

--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -1022,3 +1022,256 @@ async def test_usage_info_streaming_with_acall():
    assert llm._token_usage["total_tokens"] > 0

    assert len(result) > 0
+
+
+# --- Tests for issue #4697: tools discarded when response_model is set ---
+
+
+class MyOutput(BaseModel):
+    name: str
+    value: str
+
+
+def _make_tool_call_response():
+    """Create a mock litellm response that contains a tool call."""
+    mock_message = MagicMock()
+    mock_message.content = ""
+    mock_tool_call = MagicMock()
+    mock_tool_call.function.name = "my_search_tool"
+    mock_tool_call.function.arguments = '{"query": "test"}'
+    mock_tool_call.id = "call_123"
+    mock_message.tool_calls = [mock_tool_call]
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+    return mock_response
+
+
+def _make_text_response(text: str = '{"name": "Alice", "value": "42"}'):
+    """Create a mock litellm response that contains only text content."""
+    mock_message = MagicMock()
+    mock_message.content = text
+    mock_message.tool_calls = []
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+    return mock_response
+
+
+def _get_tool_schema():
+    return {
+        "type": "function",
+        "function": {
+            "name": "my_search_tool",
+            "description": "Search for data",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {"type": "string", "description": "The search query"}
+                },
+                "required": ["query"],
+            },
+        },
+    }
+
+
+def test_non_streaming_response_model_with_tools_skips_instructor():
+    """When response_model AND tools are both present, InternalInstructor must NOT
+    be used so that LiteLLM sees the tools and can return tool_calls."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.completion", return_value=tool_call_resp) as mock_completion:
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,  # executor handles tool execution
+            response_model=MyOutput,
+        )
+
+    # litellm.completion must have been called (not InternalInstructor)
+    mock_completion.assert_called_once()
+    # The result should be tool_calls (a list), not a structured JSON string
+    assert isinstance(result, list), (
+        "Expected tool_calls list but got a string/model — "
+        "InternalInstructor likely intercepted the call"
+    )
+
+
+def test_non_streaming_response_model_without_tools_uses_instructor():
+    """When response_model is set but NO tools, InternalInstructor should still
+    be used for structured output conversion."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    with patch(
+        "crewai.utilities.internal_instructor.InternalInstructor"
+    ) as MockInstructor:
+        mock_instance = MagicMock()
+        mock_pydantic = MyOutput(name="Alice", value="42")
+        mock_instance.to_pydantic.return_value = mock_pydantic
+        MockInstructor.return_value = mock_instance
+
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=None,
+            response_model=MyOutput,
+        )
+
+    # InternalInstructor should have been used
+    MockInstructor.assert_called_once()
+    assert '"Alice"' in result
+
+
+def test_non_streaming_response_model_with_tools_returns_tool_calls():
+    """Verify that when LLM returns tool_calls with response_model + tools,
+    the tool_calls are returned to the caller (not swallowed)."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.completion", return_value=tool_call_resp):
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    # The result should be the raw tool_calls list
+    assert isinstance(result, list)
+    assert len(result) == 1
+    assert result[0].function.name == "my_search_tool"
+
+
+def test_non_streaming_response_model_with_tools_text_response():
+    """When LLM returns text (not tool_calls) with both response_model and tools,
+    the text response should be returned normally."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    text_resp = _make_text_response('{"name": "Alice", "value": "42"}')
+
+    with patch("litellm.completion", return_value=text_resp):
+        result = llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    # Text response should be returned as-is
+    assert isinstance(result, str)
+    assert "Alice" in result
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_response_model_with_tools_skips_instructor():
+    """Async variant: response_model + tools should skip InternalInstructor."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.acompletion", return_value=tool_call_resp) as mock_acompletion:
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    mock_acompletion.assert_called_once()
+    assert isinstance(result, list), (
+        "Expected tool_calls list but got a string/model — "
+        "InternalInstructor likely intercepted the call"
+    )
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_response_model_without_tools_uses_instructor():
+    """Async variant: response_model without tools should use InternalInstructor."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    with patch(
+        "crewai.utilities.internal_instructor.InternalInstructor"
+    ) as MockInstructor:
+        mock_instance = MagicMock()
+        mock_pydantic = MyOutput(name="Alice", value="42")
+        mock_instance.to_pydantic.return_value = mock_pydantic
+        MockInstructor.return_value = mock_instance
+
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=None,
+            response_model=MyOutput,
+        )
+
+    MockInstructor.assert_called_once()
+    assert '"Alice"' in result
+
+
+def test_non_streaming_response_model_not_passed_to_litellm_when_tools_present():
+    """Verify that response_model is NOT forwarded to litellm.completion when
+    tools are present, because litellm's internal instructor would override
+    the tools parameter."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.completion", return_value=tool_call_resp) as mock_completion:
+        llm.call(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    mock_completion.assert_called_once()
+    call_kwargs = mock_completion.call_args[1]
+    # response_model must NOT be in the kwargs sent to litellm.completion
+    assert "response_model" not in call_kwargs, (
+        "response_model was passed to litellm.completion even though tools are present; "
+        "litellm's internal instructor would override the tools"
+    )
+    # tools MUST be present
+    assert "tools" in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_async_response_model_not_passed_to_litellm_when_tools_present():
+    """Async variant: response_model must not be forwarded to litellm.acompletion
+    when tools are present."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    tool_schema = _get_tool_schema()
+    tool_call_resp = _make_tool_call_response()
+
+    with patch("litellm.acompletion", return_value=tool_call_resp) as mock_acompletion:
+        await llm.acall(
+            messages=[{"role": "user", "content": "Find the name and value"}],
+            tools=[tool_schema],
+            available_functions=None,
+            response_model=MyOutput,
+        )
+
+    mock_acompletion.assert_called_once()
+    call_kwargs = mock_acompletion.call_args[1]
+    assert "response_model" not in call_kwargs, (
+        "response_model was passed to litellm.acompletion even though tools are present; "
+        "litellm's internal instructor would override the tools"
+    )
+    assert "tools" in call_kwargs