fix: address review feedback for streaming handlers and test robustness

- Fix sync streaming handler: check accumulated_tool_args (from deltas) instead of only checking tool_calls from last chunk. This mirrors the async streaming handler fix. - Add isinstance(tool_calls, list) checks in non-streaming handlers for type safety (prevents false positives from auto-created MagicMock attrs). - Update test_handle_streaming_tool_calls_no_available_functions to verify tool calls are returned (not discarded) when available_functions is None, matching the corrected behavior from issue #4788. - Emit LLM completion event before returning accumulated tool calls from streaming handler. Co-Authored-By: João <joao@crewai.com>
fix: prioritize tool calls over text when available_functions is None
2026-07-09 08:55:09 +00:00 · 2026-03-09 13:57:58 +00:00 · 2026-03-09 13:45:12 +00:00
2 changed files with 250 additions and 23 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -967,7 +967,43 @@ class LLM(BaseLLM):
                self._track_token_usage_internal(usage_info)
            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)

-            if not tool_calls or not available_functions:
+            # --- 8) Check accumulated_tool_args from streaming deltas
+            # Streaming responses deliver tool calls via deltas accumulated in
+            # accumulated_tool_args, not via the final chunk's message. When
+            # available_functions is None (native tool handling), we must return
+            # the accumulated tool calls so the caller (e.g., executor) can
+            # handle them. When available_functions is provided, tool execution
+            # already happened during the chunk processing loop via
+            # _handle_streaming_tool_calls.
+            if accumulated_tool_args and not available_functions:
+                tool_calls_list: list[ChatCompletionDeltaToolCall] = [
+                    ChatCompletionDeltaToolCall(
+                        index=idx,
+                        function=Function(
+                            name=tool_arg.function.name,
+                            arguments=tool_arg.function.arguments,
+                        ),
+                    )
+                    for idx, tool_arg in accumulated_tool_args.items()
+                    if tool_arg.function.name
+                ]
+
+                if tool_calls_list:
+                    self._handle_emit_call_events(
+                        response=full_response,
+                        call_type=LLMCallType.LLM_CALL,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                        messages=params["messages"],
+                    )
+                    return tool_calls_list
+
+            # --- 8b) If there are tool calls from last chunk but no available functions,
+            # return the tool calls
+            if tool_calls and not available_functions:
+                return tool_calls
+
+            if not tool_calls and not accumulated_tool_args:
                if response_model and self.is_litellm:
                    instructor_instance = InternalInstructor(
                        content=full_response,
@@ -994,10 +1030,11 @@ class LLM(BaseLLM):
                )
                return full_response

-            # --- 9) Handle tool calls if present
-            tool_result = self._handle_tool_call(tool_calls, available_functions)
-            if tool_result is not None:
-                return tool_result
+            # --- 9) Handle tool calls from last chunk if present (execute when available_functions provided)
+            if tool_calls and available_functions:
+                tool_result = self._handle_tool_call(tool_calls, available_functions)
+                if tool_result is not None:
+                    return tool_result

            # --- 10) Emit completion event and return response
            self._handle_emit_call_events(
@@ -1234,8 +1271,17 @@ class LLM(BaseLLM):
        # --- 4) Check for tool calls
        tool_calls = getattr(response_message, "tool_calls", [])

-        # --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
-        if (not tool_calls or not available_functions) and text_response:
+        # --- 5) If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        # This must be checked before the text response fallback because some LLMs
+        # (e.g., Anthropic) return both text content and tool calls in the same response.
+        # The isinstance check ensures we have actual tool call data (list), not
+        # auto-generated attributes from mocks or unexpected types.
+        if isinstance(tool_calls, list) and tool_calls and not available_functions:
+            return tool_calls
+
+        # --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
+        if not tool_calls and text_response:
            self._handle_emit_call_events(
                response=text_response,
                call_type=LLMCallType.LLM_CALL,
@@ -1245,13 +1291,8 @@ class LLM(BaseLLM):
            )
            return text_response

-        # --- 6) If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        if tool_calls and not available_functions:
-            return tool_calls
-
        # --- 7) Handle tool calls if present (execute when available_functions provided)
-        if tool_calls and available_functions:
+        if isinstance(tool_calls, list) and tool_calls and available_functions:
            tool_result = self._handle_tool_call(
                tool_calls, available_functions, from_task, from_agent
            )
@@ -1364,7 +1405,16 @@ class LLM(BaseLLM):

        tool_calls = getattr(response_message, "tool_calls", [])

-        if (not tool_calls or not available_functions) and text_response:
+        # If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        # This must be checked before the text response fallback because some LLMs
+        # (e.g., Anthropic) return both text content and tool calls in the same response.
+        # The isinstance check ensures we have actual tool call data (list), not
+        # auto-generated attributes from mocks or unexpected types.
+        if isinstance(tool_calls, list) and tool_calls and not available_functions:
+            return tool_calls
+
+        if not tool_calls and text_response:
            self._handle_emit_call_events(
                response=text_response,
                call_type=LLMCallType.LLM_CALL,
@@ -1374,13 +1424,8 @@ class LLM(BaseLLM):
            )
            return text_response

-        # If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        if tool_calls and not available_functions:
-            return tool_calls
-
        # Handle tool calls if present (execute when available_functions provided)
-        if tool_calls and available_functions:
+        if isinstance(tool_calls, list) and tool_calls and available_functions:
            tool_result = self._handle_tool_call(
                tool_calls, available_functions, from_task, from_agent
            )
@@ -1513,7 +1558,7 @@ class LLM(BaseLLM):
            if usage_info:
                self._track_token_usage_internal(usage_info)

-            if accumulated_tool_args and available_functions:
+            if accumulated_tool_args:
                # Convert accumulated tool args to ChatCompletionDeltaToolCall objects
                tool_calls_list: list[ChatCompletionDeltaToolCall] = [
                    ChatCompletionDeltaToolCall(
@@ -1527,7 +1572,14 @@ class LLM(BaseLLM):
                    if tool_arg.function.name
                ]

-                if tool_calls_list:
+                # If there are tool calls but no available functions, return the tool calls
+                # This allows the caller (e.g., executor) to handle tool execution.
+                # This must be checked before the text response fallback because some LLMs
+                # (e.g., Anthropic) return both text content and tool calls in the same response.
+                if tool_calls_list and not available_functions:
+                    return tool_calls_list
+
+                if tool_calls_list and available_functions:
                    result = self._handle_streaming_tool_calls(
                        tool_calls=tool_calls_list,
                        accumulated_tool_args=accumulated_tool_args,
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -614,6 +614,11 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
 def test_handle_streaming_tool_calls_no_available_functions(
    get_weather_tool_schema, mock_emit
 ):
+    """When tools are provided but available_functions is not (defaults to None),
+    the streaming handler should return the accumulated tool calls so the caller
+    (e.g., CrewAgentExecutor) can handle them. This is the fix for issue #4788
+    where tool calls were previously discarded and an empty string was returned.
+    """
    llm = LLM(model="openai/gpt-4o", stream=True, is_litellm=True)
    response = llm.call(
        messages=[
@@ -621,7 +626,14 @@ def test_handle_streaming_tool_calls_no_available_functions(
        ],
        tools=[get_weather_tool_schema],
    )
-    assert response == ""
+    # With the fix for #4788, tool calls should be returned as a list
+    # instead of being discarded (previously returned "")
+    assert isinstance(response, list), (
+        f"Expected list of tool calls but got {type(response)}: {response}"
+    )
+    assert len(response) == 1
+    assert response[0].function.name == "get_weather"
+    assert response[0].function.arguments == '{"location":"New York, NY"}'

    assert_event_count(
        mock_emit=mock_emit,
@@ -1022,3 +1034,166 @@ async def test_usage_info_streaming_with_acall():
    assert llm._token_usage["total_tokens"] > 0

    assert len(result) > 0
+
+
+def test_non_streaming_tool_calls_returned_when_no_available_functions():
+    """Test that tool calls are returned (not text) when available_functions is None.
+
+    This reproduces the bug from issue #4788 where LLMs like Anthropic return both
+    text content AND tool calls in the same response. When available_functions=None
+    (as used by the executor for native tool handling), tool calls should be returned
+    instead of the text content.
+    """
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    # Mock a response that has BOTH text content AND tool calls
+    mock_tool_call = ChatCompletionMessageToolCall(
+        id="call_123",
+        type="function",
+        function=Function(
+            name="code_search",
+            arguments='{"query": "test query"}',
+        ),
+    )
+    mock_message = MagicMock()
+    mock_message.content = "I will search for the given query."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.completion", return_value=mock_response):
+        # Call WITHOUT available_functions (as the executor does for native tool handling)
+        result = llm.call(
+            messages=[{"role": "user", "content": "Search for something"}],
+            tools=[{"type": "function", "function": {"name": "code_search"}}],
+            available_functions=None,
+        )
+
+    # Result should be the tool calls list, NOT the text response
+    assert isinstance(result, list), (
+        f"Expected list of tool calls but got {type(result)}: {result}"
+    )
+    assert len(result) == 1
+    assert result[0].function.name == "code_search"
+
+
+def test_non_streaming_text_returned_when_no_tool_calls():
+    """Test that text response is still returned when there are no tool calls."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    mock_message = MagicMock()
+    mock_message.content = "The capital of France is Paris."
+    mock_message.tool_calls = None
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.completion", return_value=mock_response):
+        result = llm.call(
+            messages=[{"role": "user", "content": "What is the capital of France?"}],
+        )
+
+    assert isinstance(result, str)
+    assert result == "The capital of France is Paris."
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
+    """Test async path: tool calls are returned (not text) when available_functions is None.
+
+    Same bug as #4788 but for the async non-streaming handler.
+    """
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
+
+    mock_tool_call = ChatCompletionMessageToolCall(
+        id="call_456",
+        type="function",
+        function=Function(
+            name="web_search",
+            arguments='{"query": "test"}',
+        ),
+    )
+    mock_message = MagicMock()
+    mock_message.content = "I will search the web."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.acompletion", return_value=mock_response):
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Search for something"}],
+            tools=[{"type": "function", "function": {"name": "web_search"}}],
+            available_functions=None,
+        )
+
+    assert isinstance(result, list), (
+        f"Expected list of tool calls but got {type(result)}: {result}"
+    )
+    assert len(result) == 1
+    assert result[0].function.name == "web_search"
+
+
+def test_non_streaming_tool_calls_executed_when_available_functions_provided():
+    """Test that tool calls are still executed when available_functions IS provided.
+
+    This ensures the fix doesn't break the normal tool execution path.
+    """
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    mock_tool_call = MagicMock()
+    mock_tool_call.function.name = "get_weather"
+    mock_tool_call.function.arguments = '{"location": "New York"}'
+
+    mock_message = MagicMock()
+    mock_message.content = "I will check the weather."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    def get_weather(location: str) -> str:
+        return f"Sunny in {location}"
+
+    with patch("litellm.completion", return_value=mock_response):
+        result = llm.call(
+            messages=[{"role": "user", "content": "What's the weather?"}],
+            tools=[{"type": "function", "function": {"name": "get_weather"}}],
+            available_functions={"get_weather": get_weather},
+        )
+
+    # When available_functions is provided, the tool should be executed
+    assert result == "Sunny in New York"