fix: prioritize tool calls over text when available_functions is None

When LLMs like Anthropic return both text content AND tool calls in
the same response, the text response was being returned instead of
the tool calls when available_functions=None. This caused the executor
to treat the text as a final answer, discarding the tool calls.

The fix reorders the priority checks in all 4 response handlers
(_handle_non_streaming_response, _ahandle_non_streaming_response,
_handle_streaming_response, _ahandle_streaming_response) so that
tool calls are returned before falling back to text content when
available_functions is None.

Fixes #4788

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-03-09 13:45:12 +00:00
parent cd42bcf035
commit 6d4fcbd7ee
2 changed files with 202 additions and 20 deletions

View File

@@ -967,7 +967,14 @@ class LLM(BaseLLM):
self._track_token_usage_internal(usage_info)
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
if not tool_calls or not available_functions:
# --- 8) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution.
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
if tool_calls and not available_functions:
return tool_calls
if not tool_calls:
if response_model and self.is_litellm:
instructor_instance = InternalInstructor(
content=full_response,
@@ -994,10 +1001,11 @@ class LLM(BaseLLM):
)
return full_response
# --- 9) Handle tool calls if present
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 9) Handle tool calls if present (execute when available_functions provided)
if tool_calls and available_functions:
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 10) Emit completion event and return response
self._handle_emit_call_events(
@@ -1234,8 +1242,15 @@ class LLM(BaseLLM):
# --- 4) Check for tool calls
tool_calls = getattr(response_message, "tool_calls", [])
# --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
if (not tool_calls or not available_functions) and text_response:
# --- 5) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
if tool_calls and not available_functions:
return tool_calls
# --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
if not tool_calls and text_response:
self._handle_emit_call_events(
response=text_response,
call_type=LLMCallType.LLM_CALL,
@@ -1245,11 +1260,6 @@ class LLM(BaseLLM):
)
return text_response
# --- 6) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
if tool_calls and not available_functions:
return tool_calls
# --- 7) Handle tool calls if present (execute when available_functions provided)
if tool_calls and available_functions:
tool_result = self._handle_tool_call(
@@ -1364,7 +1374,14 @@ class LLM(BaseLLM):
tool_calls = getattr(response_message, "tool_calls", [])
if (not tool_calls or not available_functions) and text_response:
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
if tool_calls and not available_functions:
return tool_calls
if not tool_calls and text_response:
self._handle_emit_call_events(
response=text_response,
call_type=LLMCallType.LLM_CALL,
@@ -1374,11 +1391,6 @@ class LLM(BaseLLM):
)
return text_response
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
if tool_calls and not available_functions:
return tool_calls
# Handle tool calls if present (execute when available_functions provided)
if tool_calls and available_functions:
tool_result = self._handle_tool_call(
@@ -1513,7 +1525,7 @@ class LLM(BaseLLM):
if usage_info:
self._track_token_usage_internal(usage_info)
if accumulated_tool_args and available_functions:
if accumulated_tool_args:
# Convert accumulated tool args to ChatCompletionDeltaToolCall objects
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
ChatCompletionDeltaToolCall(
@@ -1527,7 +1539,14 @@ class LLM(BaseLLM):
if tool_arg.function.name
]
if tool_calls_list:
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution.
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
if tool_calls_list and not available_functions:
return tool_calls_list
if tool_calls_list and available_functions:
result = self._handle_streaming_tool_calls(
tool_calls=tool_calls_list,
accumulated_tool_args=accumulated_tool_args,

View File

@@ -1022,3 +1022,166 @@ async def test_usage_info_streaming_with_acall():
assert llm._token_usage["total_tokens"] > 0
assert len(result) > 0
def test_non_streaming_tool_calls_returned_when_no_available_functions():
"""Test that tool calls are returned (not text) when available_functions is None.
This reproduces the bug from issue #4788 where LLMs like Anthropic return both
text content AND tool calls in the same response. When available_functions=None
(as used by the executor for native tool handling), tool calls should be returned
instead of the text content.
"""
from litellm.types.utils import ChatCompletionMessageToolCall, Function
llm = LLM(model="gpt-4o-mini", is_litellm=True)
# Mock a response that has BOTH text content AND tool calls
mock_tool_call = ChatCompletionMessageToolCall(
id="call_123",
type="function",
function=Function(
name="code_search",
arguments='{"query": "test query"}',
),
)
mock_message = MagicMock()
mock_message.content = "I will search for the given query."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.completion", return_value=mock_response):
# Call WITHOUT available_functions (as the executor does for native tool handling)
result = llm.call(
messages=[{"role": "user", "content": "Search for something"}],
tools=[{"type": "function", "function": {"name": "code_search"}}],
available_functions=None,
)
# Result should be the tool calls list, NOT the text response
assert isinstance(result, list), (
f"Expected list of tool calls but got {type(result)}: {result}"
)
assert len(result) == 1
assert result[0].function.name == "code_search"
def test_non_streaming_text_returned_when_no_tool_calls():
"""Test that text response is still returned when there are no tool calls."""
llm = LLM(model="gpt-4o-mini", is_litellm=True)
mock_message = MagicMock()
mock_message.content = "The capital of France is Paris."
mock_message.tool_calls = None
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.completion", return_value=mock_response):
result = llm.call(
messages=[{"role": "user", "content": "What is the capital of France?"}],
)
assert isinstance(result, str)
assert result == "The capital of France is Paris."
@pytest.mark.asyncio
async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
"""Test async path: tool calls are returned (not text) when available_functions is None.
Same bug as #4788 but for the async non-streaming handler.
"""
from litellm.types.utils import ChatCompletionMessageToolCall, Function
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
mock_tool_call = ChatCompletionMessageToolCall(
id="call_456",
type="function",
function=Function(
name="web_search",
arguments='{"query": "test"}',
),
)
mock_message = MagicMock()
mock_message.content = "I will search the web."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.acompletion", return_value=mock_response):
result = await llm.acall(
messages=[{"role": "user", "content": "Search for something"}],
tools=[{"type": "function", "function": {"name": "web_search"}}],
available_functions=None,
)
assert isinstance(result, list), (
f"Expected list of tool calls but got {type(result)}: {result}"
)
assert len(result) == 1
assert result[0].function.name == "web_search"
def test_non_streaming_tool_calls_executed_when_available_functions_provided():
"""Test that tool calls are still executed when available_functions IS provided.
This ensures the fix doesn't break the normal tool execution path.
"""
llm = LLM(model="gpt-4o-mini", is_litellm=True)
mock_tool_call = MagicMock()
mock_tool_call.function.name = "get_weather"
mock_tool_call.function.arguments = '{"location": "New York"}'
mock_message = MagicMock()
mock_message.content = "I will check the weather."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
def get_weather(location: str) -> str:
return f"Sunny in {location}"
with patch("litellm.completion", return_value=mock_response):
result = llm.call(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=[{"type": "function", "function": {"name": "get_weather"}}],
available_functions={"get_weather": get_weather},
)
# When available_functions is provided, the tool should be executed
assert result == "Sunny in New York"