fix: prioritize tool calls over text when available_functions is None

When LLMs like Anthropic return both text content AND tool calls in the same response, the text response was being returned instead of the tool calls when available_functions=None. This caused the executor to treat the text as a final answer, discarding the tool calls. The fix reorders the priority checks in all 4 response handlers (_handle_non_streaming_response, _ahandle_non_streaming_response, _handle_streaming_response, _ahandle_streaming_response) so that tool calls are returned before falling back to text content when available_functions is None. Fixes #4788 Co-Authored-By: João <joao@crewai.com>
2026-07-03 06:08:15 +00:00 · 2026-03-09 13:45:12 +00:00
parent cd42bcf035
commit 6d4fcbd7ee
2 changed files with 202 additions and 20 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -967,7 +967,14 @@ class LLM(BaseLLM):
                self._track_token_usage_internal(usage_info)
            self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)

-            if not tool_calls or not available_functions:
+            # --- 8) If there are tool calls but no available functions, return the tool calls
+            # This allows the caller (e.g., executor) to handle tool execution.
+            # This must be checked before the text response fallback because some LLMs
+            # (e.g., Anthropic) return both text content and tool calls in the same response.
+            if tool_calls and not available_functions:
+                return tool_calls
+
+            if not tool_calls:
                if response_model and self.is_litellm:
                    instructor_instance = InternalInstructor(
                        content=full_response,
@@ -994,10 +1001,11 @@ class LLM(BaseLLM):
                )
                return full_response

-            # --- 9) Handle tool calls if present
-            tool_result = self._handle_tool_call(tool_calls, available_functions)
-            if tool_result is not None:
-                return tool_result
+            # --- 9) Handle tool calls if present (execute when available_functions provided)
+            if tool_calls and available_functions:
+                tool_result = self._handle_tool_call(tool_calls, available_functions)
+                if tool_result is not None:
+                    return tool_result

            # --- 10) Emit completion event and return response
            self._handle_emit_call_events(
@@ -1234,8 +1242,15 @@ class LLM(BaseLLM):
        # --- 4) Check for tool calls
        tool_calls = getattr(response_message, "tool_calls", [])

-        # --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
-        if (not tool_calls or not available_functions) and text_response:
+        # --- 5) If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        # This must be checked before the text response fallback because some LLMs
+        # (e.g., Anthropic) return both text content and tool calls in the same response.
+        if tool_calls and not available_functions:
+            return tool_calls
+
+        # --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
+        if not tool_calls and text_response:
            self._handle_emit_call_events(
                response=text_response,
                call_type=LLMCallType.LLM_CALL,
@@ -1245,11 +1260,6 @@ class LLM(BaseLLM):
            )
            return text_response

-        # --- 6) If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        if tool_calls and not available_functions:
-            return tool_calls
-
        # --- 7) Handle tool calls if present (execute when available_functions provided)
        if tool_calls and available_functions:
            tool_result = self._handle_tool_call(
@@ -1364,7 +1374,14 @@ class LLM(BaseLLM):

        tool_calls = getattr(response_message, "tool_calls", [])

-        if (not tool_calls or not available_functions) and text_response:
+        # If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        # This must be checked before the text response fallback because some LLMs
+        # (e.g., Anthropic) return both text content and tool calls in the same response.
+        if tool_calls and not available_functions:
+            return tool_calls
+
+        if not tool_calls and text_response:
            self._handle_emit_call_events(
                response=text_response,
                call_type=LLMCallType.LLM_CALL,
@@ -1374,11 +1391,6 @@ class LLM(BaseLLM):
            )
            return text_response

-        # If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        if tool_calls and not available_functions:
-            return tool_calls
-
        # Handle tool calls if present (execute when available_functions provided)
        if tool_calls and available_functions:
            tool_result = self._handle_tool_call(
@@ -1513,7 +1525,7 @@ class LLM(BaseLLM):
            if usage_info:
                self._track_token_usage_internal(usage_info)

-            if accumulated_tool_args and available_functions:
+            if accumulated_tool_args:
                # Convert accumulated tool args to ChatCompletionDeltaToolCall objects
                tool_calls_list: list[ChatCompletionDeltaToolCall] = [
                    ChatCompletionDeltaToolCall(
@@ -1527,7 +1539,14 @@ class LLM(BaseLLM):
                    if tool_arg.function.name
                ]

-                if tool_calls_list:
+                # If there are tool calls but no available functions, return the tool calls
+                # This allows the caller (e.g., executor) to handle tool execution.
+                # This must be checked before the text response fallback because some LLMs
+                # (e.g., Anthropic) return both text content and tool calls in the same response.
+                if tool_calls_list and not available_functions:
+                    return tool_calls_list
+
+                if tool_calls_list and available_functions:
                    result = self._handle_streaming_tool_calls(
                        tool_calls=tool_calls_list,
                        accumulated_tool_args=accumulated_tool_args,
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -1022,3 +1022,166 @@ async def test_usage_info_streaming_with_acall():
    assert llm._token_usage["total_tokens"] > 0

    assert len(result) > 0
+
+
+def test_non_streaming_tool_calls_returned_when_no_available_functions():
+    """Test that tool calls are returned (not text) when available_functions is None.
+
+    This reproduces the bug from issue #4788 where LLMs like Anthropic return both
+    text content AND tool calls in the same response. When available_functions=None
+    (as used by the executor for native tool handling), tool calls should be returned
+    instead of the text content.
+    """
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    # Mock a response that has BOTH text content AND tool calls
+    mock_tool_call = ChatCompletionMessageToolCall(
+        id="call_123",
+        type="function",
+        function=Function(
+            name="code_search",
+            arguments='{"query": "test query"}',
+        ),
+    )
+    mock_message = MagicMock()
+    mock_message.content = "I will search for the given query."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.completion", return_value=mock_response):
+        # Call WITHOUT available_functions (as the executor does for native tool handling)
+        result = llm.call(
+            messages=[{"role": "user", "content": "Search for something"}],
+            tools=[{"type": "function", "function": {"name": "code_search"}}],
+            available_functions=None,
+        )
+
+    # Result should be the tool calls list, NOT the text response
+    assert isinstance(result, list), (
+        f"Expected list of tool calls but got {type(result)}: {result}"
+    )
+    assert len(result) == 1
+    assert result[0].function.name == "code_search"
+
+
+def test_non_streaming_text_returned_when_no_tool_calls():
+    """Test that text response is still returned when there are no tool calls."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    mock_message = MagicMock()
+    mock_message.content = "The capital of France is Paris."
+    mock_message.tool_calls = None
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.completion", return_value=mock_response):
+        result = llm.call(
+            messages=[{"role": "user", "content": "What is the capital of France?"}],
+        )
+
+    assert isinstance(result, str)
+    assert result == "The capital of France is Paris."
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
+    """Test async path: tool calls are returned (not text) when available_functions is None.
+
+    Same bug as #4788 but for the async non-streaming handler.
+    """
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
+
+    mock_tool_call = ChatCompletionMessageToolCall(
+        id="call_456",
+        type="function",
+        function=Function(
+            name="web_search",
+            arguments='{"query": "test"}',
+        ),
+    )
+    mock_message = MagicMock()
+    mock_message.content = "I will search the web."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.acompletion", return_value=mock_response):
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Search for something"}],
+            tools=[{"type": "function", "function": {"name": "web_search"}}],
+            available_functions=None,
+        )
+
+    assert isinstance(result, list), (
+        f"Expected list of tool calls but got {type(result)}: {result}"
+    )
+    assert len(result) == 1
+    assert result[0].function.name == "web_search"
+
+
+def test_non_streaming_tool_calls_executed_when_available_functions_provided():
+    """Test that tool calls are still executed when available_functions IS provided.
+
+    This ensures the fix doesn't break the normal tool execution path.
+    """
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    mock_tool_call = MagicMock()
+    mock_tool_call.function.name = "get_weather"
+    mock_tool_call.function.arguments = '{"location": "New York"}'
+
+    mock_message = MagicMock()
+    mock_message.content = "I will check the weather."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    def get_weather(location: str) -> str:
+        return f"Sunny in {location}"
+
+    with patch("litellm.completion", return_value=mock_response):
+        result = llm.call(
+            messages=[{"role": "user", "content": "What's the weather?"}],
+            tools=[{"type": "function", "function": {"name": "get_weather"}}],
+            available_functions={"get_weather": get_weather},
+        )
+
+    # When available_functions is provided, the tool should be executed
+    assert result == "Sunny in New York"