From 6d4fcbd7ee1833ee311d10ee386cc37898054030 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 9 Mar 2026 13:45:12 +0000
Subject: [PATCH] fix: prioritize tool calls over text when available_functions
 is None
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When LLMs like Anthropic return both text content AND tool calls in
the same response, the text response was being returned instead of
the tool calls when available_functions=None. This caused the executor
to treat the text as a final answer, discarding the tool calls.

The fix reorders the priority checks in all 4 response handlers
(_handle_non_streaming_response, _ahandle_non_streaming_response,
_handle_streaming_response, _ahandle_streaming_response) so that
tool calls are returned before falling back to text content when
available_functions is None.

Fixes #4788

Co-Authored-By: João <joao@crewai.com>
---
 lib/crewai/src/crewai/llm.py |  59 ++++++++-----
 lib/crewai/tests/test_llm.py | 163 +++++++++++++++++++++++++++++++++++
 2 files changed, 202 insertions(+), 20 deletions(-)

diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index 8a4ac2edd..35b268ae2 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -967,7 +967,14 @@ class LLM(BaseLLM):
                 self._track_token_usage_internal(usage_info)
             self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
 
-            if not tool_calls or not available_functions:
+            # --- 8) If there are tool calls but no available functions, return the tool calls
+            # This allows the caller (e.g., executor) to handle tool execution.
+            # This must be checked before the text response fallback because some LLMs
+            # (e.g., Anthropic) return both text content and tool calls in the same response.
+            if tool_calls and not available_functions:
+                return tool_calls
+
+            if not tool_calls:
                 if response_model and self.is_litellm:
                     instructor_instance = InternalInstructor(
                         content=full_response,
@@ -994,10 +1001,11 @@ class LLM(BaseLLM):
                 )
                 return full_response
 
-            # --- 9) Handle tool calls if present
-            tool_result = self._handle_tool_call(tool_calls, available_functions)
-            if tool_result is not None:
-                return tool_result
+            # --- 9) Handle tool calls if present (execute when available_functions provided)
+            if tool_calls and available_functions:
+                tool_result = self._handle_tool_call(tool_calls, available_functions)
+                if tool_result is not None:
+                    return tool_result
 
             # --- 10) Emit completion event and return response
             self._handle_emit_call_events(
@@ -1234,8 +1242,15 @@ class LLM(BaseLLM):
         # --- 4) Check for tool calls
         tool_calls = getattr(response_message, "tool_calls", [])
 
-        # --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
-        if (not tool_calls or not available_functions) and text_response:
+        # --- 5) If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        # This must be checked before the text response fallback because some LLMs
+        # (e.g., Anthropic) return both text content and tool calls in the same response.
+        if tool_calls and not available_functions:
+            return tool_calls
+
+        # --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
+        if not tool_calls and text_response:
             self._handle_emit_call_events(
                 response=text_response,
                 call_type=LLMCallType.LLM_CALL,
@@ -1245,11 +1260,6 @@ class LLM(BaseLLM):
             )
             return text_response
 
-        # --- 6) If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        if tool_calls and not available_functions:
-            return tool_calls
-
         # --- 7) Handle tool calls if present (execute when available_functions provided)
         if tool_calls and available_functions:
             tool_result = self._handle_tool_call(
@@ -1364,7 +1374,14 @@ class LLM(BaseLLM):
 
         tool_calls = getattr(response_message, "tool_calls", [])
 
-        if (not tool_calls or not available_functions) and text_response:
+        # If there are tool calls but no available functions, return the tool calls
+        # This allows the caller (e.g., executor) to handle tool execution
+        # This must be checked before the text response fallback because some LLMs
+        # (e.g., Anthropic) return both text content and tool calls in the same response.
+        if tool_calls and not available_functions:
+            return tool_calls
+
+        if not tool_calls and text_response:
             self._handle_emit_call_events(
                 response=text_response,
                 call_type=LLMCallType.LLM_CALL,
@@ -1374,11 +1391,6 @@ class LLM(BaseLLM):
             )
             return text_response
 
-        # If there are tool calls but no available functions, return the tool calls
-        # This allows the caller (e.g., executor) to handle tool execution
-        if tool_calls and not available_functions:
-            return tool_calls
-
         # Handle tool calls if present (execute when available_functions provided)
         if tool_calls and available_functions:
             tool_result = self._handle_tool_call(
@@ -1513,7 +1525,7 @@ class LLM(BaseLLM):
             if usage_info:
                 self._track_token_usage_internal(usage_info)
 
-            if accumulated_tool_args and available_functions:
+            if accumulated_tool_args:
                 # Convert accumulated tool args to ChatCompletionDeltaToolCall objects
                 tool_calls_list: list[ChatCompletionDeltaToolCall] = [
                     ChatCompletionDeltaToolCall(
@@ -1527,7 +1539,14 @@ class LLM(BaseLLM):
                     if tool_arg.function.name
                 ]
 
-                if tool_calls_list:
+                # If there are tool calls but no available functions, return the tool calls
+                # This allows the caller (e.g., executor) to handle tool execution.
+                # This must be checked before the text response fallback because some LLMs
+                # (e.g., Anthropic) return both text content and tool calls in the same response.
+                if tool_calls_list and not available_functions:
+                    return tool_calls_list
+
+                if tool_calls_list and available_functions:
                     result = self._handle_streaming_tool_calls(
                         tool_calls=tool_calls_list,
                         accumulated_tool_args=accumulated_tool_args,
diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py
index 71cb69790..43f991e90 100644
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -1022,3 +1022,166 @@ async def test_usage_info_streaming_with_acall():
     assert llm._token_usage["total_tokens"] > 0
 
     assert len(result) > 0
+
+
+def test_non_streaming_tool_calls_returned_when_no_available_functions():
+    """Test that tool calls are returned (not text) when available_functions is None.
+
+    This reproduces the bug from issue #4788 where LLMs like Anthropic return both
+    text content AND tool calls in the same response. When available_functions=None
+    (as used by the executor for native tool handling), tool calls should be returned
+    instead of the text content.
+    """
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    # Mock a response that has BOTH text content AND tool calls
+    mock_tool_call = ChatCompletionMessageToolCall(
+        id="call_123",
+        type="function",
+        function=Function(
+            name="code_search",
+            arguments='{"query": "test query"}',
+        ),
+    )
+    mock_message = MagicMock()
+    mock_message.content = "I will search for the given query."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.completion", return_value=mock_response):
+        # Call WITHOUT available_functions (as the executor does for native tool handling)
+        result = llm.call(
+            messages=[{"role": "user", "content": "Search for something"}],
+            tools=[{"type": "function", "function": {"name": "code_search"}}],
+            available_functions=None,
+        )
+
+    # Result should be the tool calls list, NOT the text response
+    assert isinstance(result, list), (
+        f"Expected list of tool calls but got {type(result)}: {result}"
+    )
+    assert len(result) == 1
+    assert result[0].function.name == "code_search"
+
+
+def test_non_streaming_text_returned_when_no_tool_calls():
+    """Test that text response is still returned when there are no tool calls."""
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    mock_message = MagicMock()
+    mock_message.content = "The capital of France is Paris."
+    mock_message.tool_calls = None
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.completion", return_value=mock_response):
+        result = llm.call(
+            messages=[{"role": "user", "content": "What is the capital of France?"}],
+        )
+
+    assert isinstance(result, str)
+    assert result == "The capital of France is Paris."
+
+
+@pytest.mark.asyncio
+async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
+    """Test async path: tool calls are returned (not text) when available_functions is None.
+
+    Same bug as #4788 but for the async non-streaming handler.
+    """
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
+
+    mock_tool_call = ChatCompletionMessageToolCall(
+        id="call_456",
+        type="function",
+        function=Function(
+            name="web_search",
+            arguments='{"query": "test"}',
+        ),
+    )
+    mock_message = MagicMock()
+    mock_message.content = "I will search the web."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    with patch("litellm.acompletion", return_value=mock_response):
+        result = await llm.acall(
+            messages=[{"role": "user", "content": "Search for something"}],
+            tools=[{"type": "function", "function": {"name": "web_search"}}],
+            available_functions=None,
+        )
+
+    assert isinstance(result, list), (
+        f"Expected list of tool calls but got {type(result)}: {result}"
+    )
+    assert len(result) == 1
+    assert result[0].function.name == "web_search"
+
+
+def test_non_streaming_tool_calls_executed_when_available_functions_provided():
+    """Test that tool calls are still executed when available_functions IS provided.
+
+    This ensures the fix doesn't break the normal tool execution path.
+    """
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+
+    mock_tool_call = MagicMock()
+    mock_tool_call.function.name = "get_weather"
+    mock_tool_call.function.arguments = '{"location": "New York"}'
+
+    mock_message = MagicMock()
+    mock_message.content = "I will check the weather."
+    mock_message.tool_calls = [mock_tool_call]
+
+    mock_choice = MagicMock()
+    mock_choice.message = mock_message
+
+    mock_response = MagicMock()
+    mock_response.choices = [mock_choice]
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.usage.total_tokens = 15
+
+    def get_weather(location: str) -> str:
+        return f"Sunny in {location}"
+
+    with patch("litellm.completion", return_value=mock_response):
+        result = llm.call(
+            messages=[{"role": "user", "content": "What's the weather?"}],
+            tools=[{"type": "function", "function": {"name": "get_weather"}}],
+            available_functions={"get_weather": get_weather},
+        )
+
+    # When available_functions is provided, the tool should be executed
+    assert result == "Sunny in New York"