Compare commits

...

2 Commits

Author SHA1 Message Date
Devin AI
f13272e3a1 fix: address review feedback for streaming handlers and test robustness
- Fix sync streaming handler: check accumulated_tool_args (from deltas)
  instead of only checking tool_calls from last chunk. This mirrors the
  async streaming handler fix.
- Add isinstance(tool_calls, list) checks in non-streaming handlers for
  type safety (prevents false positives from auto-created MagicMock attrs).
- Update test_handle_streaming_tool_calls_no_available_functions to verify
  tool calls are returned (not discarded) when available_functions is None,
  matching the corrected behavior from issue #4788.
- Emit LLM completion event before returning accumulated tool calls from
  streaming handler.

Co-Authored-By: João <joao@crewai.com>
2026-03-09 13:57:58 +00:00
Devin AI
6d4fcbd7ee fix: prioritize tool calls over text when available_functions is None
When LLMs like Anthropic return both text content AND tool calls in
the same response, the text response was being returned instead of
the tool calls when available_functions=None. This caused the executor
to treat the text as a final answer, discarding the tool calls.

The fix reorders the priority checks in all 4 response handlers
(_handle_non_streaming_response, _ahandle_non_streaming_response,
_handle_streaming_response, _ahandle_streaming_response) so that
tool calls are returned before falling back to text content when
available_functions is None.

Fixes #4788

Co-Authored-By: João <joao@crewai.com>
2026-03-09 13:45:12 +00:00
2 changed files with 250 additions and 23 deletions

View File

@@ -967,7 +967,43 @@ class LLM(BaseLLM):
self._track_token_usage_internal(usage_info)
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
if not tool_calls or not available_functions:
# --- 8) Check accumulated_tool_args from streaming deltas
# Streaming responses deliver tool calls via deltas accumulated in
# accumulated_tool_args, not via the final chunk's message. When
# available_functions is None (native tool handling), we must return
# the accumulated tool calls so the caller (e.g., executor) can
# handle them. When available_functions is provided, tool execution
# already happened during the chunk processing loop via
# _handle_streaming_tool_calls.
if accumulated_tool_args and not available_functions:
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
ChatCompletionDeltaToolCall(
index=idx,
function=Function(
name=tool_arg.function.name,
arguments=tool_arg.function.arguments,
),
)
for idx, tool_arg in accumulated_tool_args.items()
if tool_arg.function.name
]
if tool_calls_list:
self._handle_emit_call_events(
response=full_response,
call_type=LLMCallType.LLM_CALL,
from_task=from_task,
from_agent=from_agent,
messages=params["messages"],
)
return tool_calls_list
# --- 8b) If there are tool calls from last chunk but no available functions,
# return the tool calls
if tool_calls and not available_functions:
return tool_calls
if not tool_calls and not accumulated_tool_args:
if response_model and self.is_litellm:
instructor_instance = InternalInstructor(
content=full_response,
@@ -994,10 +1030,11 @@ class LLM(BaseLLM):
)
return full_response
# --- 9) Handle tool calls if present
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 9) Handle tool calls from last chunk if present (execute when available_functions provided)
if tool_calls and available_functions:
tool_result = self._handle_tool_call(tool_calls, available_functions)
if tool_result is not None:
return tool_result
# --- 10) Emit completion event and return response
self._handle_emit_call_events(
@@ -1234,8 +1271,17 @@ class LLM(BaseLLM):
# --- 4) Check for tool calls
tool_calls = getattr(response_message, "tool_calls", [])
# --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
if (not tool_calls or not available_functions) and text_response:
# --- 5) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
# The isinstance check ensures we have actual tool call data (list), not
# auto-generated attributes from mocks or unexpected types.
if isinstance(tool_calls, list) and tool_calls and not available_functions:
return tool_calls
# --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
if not tool_calls and text_response:
self._handle_emit_call_events(
response=text_response,
call_type=LLMCallType.LLM_CALL,
@@ -1245,13 +1291,8 @@ class LLM(BaseLLM):
)
return text_response
# --- 6) If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
if tool_calls and not available_functions:
return tool_calls
# --- 7) Handle tool calls if present (execute when available_functions provided)
if tool_calls and available_functions:
if isinstance(tool_calls, list) and tool_calls and available_functions:
tool_result = self._handle_tool_call(
tool_calls, available_functions, from_task, from_agent
)
@@ -1364,7 +1405,16 @@ class LLM(BaseLLM):
tool_calls = getattr(response_message, "tool_calls", [])
if (not tool_calls or not available_functions) and text_response:
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
# The isinstance check ensures we have actual tool call data (list), not
# auto-generated attributes from mocks or unexpected types.
if isinstance(tool_calls, list) and tool_calls and not available_functions:
return tool_calls
if not tool_calls and text_response:
self._handle_emit_call_events(
response=text_response,
call_type=LLMCallType.LLM_CALL,
@@ -1374,13 +1424,8 @@ class LLM(BaseLLM):
)
return text_response
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution
if tool_calls and not available_functions:
return tool_calls
# Handle tool calls if present (execute when available_functions provided)
if tool_calls and available_functions:
if isinstance(tool_calls, list) and tool_calls and available_functions:
tool_result = self._handle_tool_call(
tool_calls, available_functions, from_task, from_agent
)
@@ -1513,7 +1558,7 @@ class LLM(BaseLLM):
if usage_info:
self._track_token_usage_internal(usage_info)
if accumulated_tool_args and available_functions:
if accumulated_tool_args:
# Convert accumulated tool args to ChatCompletionDeltaToolCall objects
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
ChatCompletionDeltaToolCall(
@@ -1527,7 +1572,14 @@ class LLM(BaseLLM):
if tool_arg.function.name
]
if tool_calls_list:
# If there are tool calls but no available functions, return the tool calls
# This allows the caller (e.g., executor) to handle tool execution.
# This must be checked before the text response fallback because some LLMs
# (e.g., Anthropic) return both text content and tool calls in the same response.
if tool_calls_list and not available_functions:
return tool_calls_list
if tool_calls_list and available_functions:
result = self._handle_streaming_tool_calls(
tool_calls=tool_calls_list,
accumulated_tool_args=accumulated_tool_args,

View File

@@ -614,6 +614,11 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
def test_handle_streaming_tool_calls_no_available_functions(
get_weather_tool_schema, mock_emit
):
"""When tools are provided but available_functions is not (defaults to None),
the streaming handler should return the accumulated tool calls so the caller
(e.g., CrewAgentExecutor) can handle them. This is the fix for issue #4788
where tool calls were previously discarded and an empty string was returned.
"""
llm = LLM(model="openai/gpt-4o", stream=True, is_litellm=True)
response = llm.call(
messages=[
@@ -621,7 +626,14 @@ def test_handle_streaming_tool_calls_no_available_functions(
],
tools=[get_weather_tool_schema],
)
assert response == ""
# With the fix for #4788, tool calls should be returned as a list
# instead of being discarded (previously returned "")
assert isinstance(response, list), (
f"Expected list of tool calls but got {type(response)}: {response}"
)
assert len(response) == 1
assert response[0].function.name == "get_weather"
assert response[0].function.arguments == '{"location":"New York, NY"}'
assert_event_count(
mock_emit=mock_emit,
@@ -1022,3 +1034,166 @@ async def test_usage_info_streaming_with_acall():
assert llm._token_usage["total_tokens"] > 0
assert len(result) > 0
def test_non_streaming_tool_calls_returned_when_no_available_functions():
"""Test that tool calls are returned (not text) when available_functions is None.
This reproduces the bug from issue #4788 where LLMs like Anthropic return both
text content AND tool calls in the same response. When available_functions=None
(as used by the executor for native tool handling), tool calls should be returned
instead of the text content.
"""
from litellm.types.utils import ChatCompletionMessageToolCall, Function
llm = LLM(model="gpt-4o-mini", is_litellm=True)
# Mock a response that has BOTH text content AND tool calls
mock_tool_call = ChatCompletionMessageToolCall(
id="call_123",
type="function",
function=Function(
name="code_search",
arguments='{"query": "test query"}',
),
)
mock_message = MagicMock()
mock_message.content = "I will search for the given query."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.completion", return_value=mock_response):
# Call WITHOUT available_functions (as the executor does for native tool handling)
result = llm.call(
messages=[{"role": "user", "content": "Search for something"}],
tools=[{"type": "function", "function": {"name": "code_search"}}],
available_functions=None,
)
# Result should be the tool calls list, NOT the text response
assert isinstance(result, list), (
f"Expected list of tool calls but got {type(result)}: {result}"
)
assert len(result) == 1
assert result[0].function.name == "code_search"
def test_non_streaming_text_returned_when_no_tool_calls():
"""Test that text response is still returned when there are no tool calls."""
llm = LLM(model="gpt-4o-mini", is_litellm=True)
mock_message = MagicMock()
mock_message.content = "The capital of France is Paris."
mock_message.tool_calls = None
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.completion", return_value=mock_response):
result = llm.call(
messages=[{"role": "user", "content": "What is the capital of France?"}],
)
assert isinstance(result, str)
assert result == "The capital of France is Paris."
@pytest.mark.asyncio
async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
"""Test async path: tool calls are returned (not text) when available_functions is None.
Same bug as #4788 but for the async non-streaming handler.
"""
from litellm.types.utils import ChatCompletionMessageToolCall, Function
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
mock_tool_call = ChatCompletionMessageToolCall(
id="call_456",
type="function",
function=Function(
name="web_search",
arguments='{"query": "test"}',
),
)
mock_message = MagicMock()
mock_message.content = "I will search the web."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
with patch("litellm.acompletion", return_value=mock_response):
result = await llm.acall(
messages=[{"role": "user", "content": "Search for something"}],
tools=[{"type": "function", "function": {"name": "web_search"}}],
available_functions=None,
)
assert isinstance(result, list), (
f"Expected list of tool calls but got {type(result)}: {result}"
)
assert len(result) == 1
assert result[0].function.name == "web_search"
def test_non_streaming_tool_calls_executed_when_available_functions_provided():
"""Test that tool calls are still executed when available_functions IS provided.
This ensures the fix doesn't break the normal tool execution path.
"""
llm = LLM(model="gpt-4o-mini", is_litellm=True)
mock_tool_call = MagicMock()
mock_tool_call.function.name = "get_weather"
mock_tool_call.function.arguments = '{"location": "New York"}'
mock_message = MagicMock()
mock_message.content = "I will check the weather."
mock_message.tool_calls = [mock_tool_call]
mock_choice = MagicMock()
mock_choice.message = mock_message
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
def get_weather(location: str) -> str:
return f"Sunny in {location}"
with patch("litellm.completion", return_value=mock_response):
result = llm.call(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=[{"type": "function", "function": {"name": "get_weather"}}],
available_functions={"get_weather": get_weather},
)
# When available_functions is provided, the tool should be executed
assert result == "Sunny in New York"