fix: address review feedback for streaming handlers and test robustness

- Fix sync streaming handler: check accumulated_tool_args (from deltas) instead of only checking tool_calls from last chunk. This mirrors the async streaming handler fix. - Add isinstance(tool_calls, list) checks in non-streaming handlers for type safety (prevents false positives from auto-created MagicMock attrs). - Update test_handle_streaming_tool_calls_no_available_functions to verify tool calls are returned (not discarded) when available_functions is None, matching the corrected behavior from issue #4788. - Emit LLM completion event before returning accumulated tool calls from streaming handler. Co-Authored-By: João <joao@crewai.com>
2026-04-30 23:02:50 +00:00 · 2026-03-09 13:57:58 +00:00
parent 6d4fcbd7ee
commit f13272e3a1
2 changed files with 56 additions and 11 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -967,14 +967,43 @@ class LLM(BaseLLM):
                self._track_token_usage_internal(usage_info)
            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)

-            # --- 8) If there are tool calls but no available functions, return the tool calls
-            # This allows the caller (e.g., executor) to handle tool execution.
-            # This must be checked before the text response fallback because some LLMs
-            # (e.g., Anthropic) return both text content and tool calls in the same response.
+            # --- 8) Check accumulated_tool_args from streaming deltas
+            # Streaming responses deliver tool calls via deltas accumulated in
+            # accumulated_tool_args, not via the final chunk's message. When
+            # available_functions is None (native tool handling), we must return
+            # the accumulated tool calls so the caller (e.g., executor) can
+            # handle them. When available_functions is provided, tool execution
+            # already happened during the chunk processing loop via
+            # _handle_streaming_tool_calls.
+            if accumulated_tool_args and not available_functions:
+                tool_calls_list: list[ChatCompletionDeltaToolCall] = [
+                    ChatCompletionDeltaToolCall(
+                        index=idx,
+                        function=Function(
+                            name=tool_arg.function.name,
+                            arguments=tool_arg.function.arguments,
+                        ),
+                    )
+                    for idx, tool_arg in accumulated_tool_args.items()
+                    if tool_arg.function.name
+                ]
+
+                if tool_calls_list:
+                    self._handle_emit_call_events(
+                        response=full_response,
+                        call_type=LLMCallType.LLM_CALL,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                        messages=params["messages"],
+                    )
+                    return tool_calls_list
+
+            # --- 8b) If there are tool calls from last chunk but no available functions,
+            # return the tool calls
            if tool_calls and not available_functions:
                return tool_calls

-            if not tool_calls:
+            if not tool_calls and not accumulated_tool_args:
                if response_model and self.is_litellm:
                    instructor_instance = InternalInstructor(
                        content=full_response,
@@ -1001,7 +1030,7 @@ class LLM(BaseLLM):
                )
                return full_response

-            # --- 9) Handle tool calls if present (execute when available_functions provided)
+            # --- 9) Handle tool calls from last chunk if present (execute when available_functions provided)
            if tool_calls and available_functions:
                tool_result = self._handle_tool_call(tool_calls, available_functions)
                if tool_result is not None:
@@ -1246,7 +1275,9 @@ class LLM(BaseLLM):
        # This allows the caller (e.g., executor) to handle tool execution
        # This must be checked before the text response fallback because some LLMs
        # (e.g., Anthropic) return both text content and tool calls in the same response.
-        if tool_calls and not available_functions:
+        # The isinstance check ensures we have actual tool call data (list), not
+        # auto-generated attributes from mocks or unexpected types.
+        if isinstance(tool_calls, list) and tool_calls and not available_functions:
            return tool_calls

        # --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
@@ -1261,7 +1292,7 @@ class LLM(BaseLLM):
            return text_response

        # --- 7) Handle tool calls if present (execute when available_functions provided)
-        if tool_calls and available_functions:
+        if isinstance(tool_calls, list) and tool_calls and available_functions:
            tool_result = self._handle_tool_call(
                tool_calls, available_functions, from_task, from_agent
            )
@@ -1378,7 +1409,9 @@ class LLM(BaseLLM):
        # This allows the caller (e.g., executor) to handle tool execution
        # This must be checked before the text response fallback because some LLMs
        # (e.g., Anthropic) return both text content and tool calls in the same response.
-        if tool_calls and not available_functions:
+        # The isinstance check ensures we have actual tool call data (list), not
+        # auto-generated attributes from mocks or unexpected types.
+        if isinstance(tool_calls, list) and tool_calls and not available_functions:
            return tool_calls

        if not tool_calls and text_response:
@@ -1392,7 +1425,7 @@ class LLM(BaseLLM):
            return text_response

        # Handle tool calls if present (execute when available_functions provided)
-        if tool_calls and available_functions:
+        if isinstance(tool_calls, list) and tool_calls and available_functions:
            tool_result = self._handle_tool_call(
                tool_calls, available_functions, from_task, from_agent
            )
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -614,6 +614,11 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
 def test_handle_streaming_tool_calls_no_available_functions(
    get_weather_tool_schema, mock_emit
 ):
+    """When tools are provided but available_functions is not (defaults to None),
+    the streaming handler should return the accumulated tool calls so the caller
+    (e.g., CrewAgentExecutor) can handle them. This is the fix for issue #4788
+    where tool calls were previously discarded and an empty string was returned.
+    """
    llm = LLM(model="openai/gpt-4o", stream=True, is_litellm=True)
    response = llm.call(
        messages=[
@@ -621,7 +626,14 @@ def test_handle_streaming_tool_calls_no_available_functions(
        ],
        tools=[get_weather_tool_schema],
    )
-    assert response == ""
+    # With the fix for #4788, tool calls should be returned as a list
+    # instead of being discarded (previously returned "")
+    assert isinstance(response, list), (
+        f"Expected list of tool calls but got {type(response)}: {response}"
+    )
+    assert len(response) == 1
+    assert response[0].function.name == "get_weather"
+    assert response[0].function.arguments == '{"location":"New York, NY"}'

    assert_event_count(
        mock_emit=mock_emit,