mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-16 07:52:39 +00:00
Compare commits
2 Commits
main
...
devin/1773
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f13272e3a1 | ||
|
|
6d4fcbd7ee |
@@ -967,7 +967,43 @@ class LLM(BaseLLM):
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
if not tool_calls or not available_functions:
|
||||
# --- 8) Check accumulated_tool_args from streaming deltas
|
||||
# Streaming responses deliver tool calls via deltas accumulated in
|
||||
# accumulated_tool_args, not via the final chunk's message. When
|
||||
# available_functions is None (native tool handling), we must return
|
||||
# the accumulated tool calls so the caller (e.g., executor) can
|
||||
# handle them. When available_functions is provided, tool execution
|
||||
# already happened during the chunk processing loop via
|
||||
# _handle_streaming_tool_calls.
|
||||
if accumulated_tool_args and not available_functions:
|
||||
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
|
||||
ChatCompletionDeltaToolCall(
|
||||
index=idx,
|
||||
function=Function(
|
||||
name=tool_arg.function.name,
|
||||
arguments=tool_arg.function.arguments,
|
||||
),
|
||||
)
|
||||
for idx, tool_arg in accumulated_tool_args.items()
|
||||
if tool_arg.function.name
|
||||
]
|
||||
|
||||
if tool_calls_list:
|
||||
self._handle_emit_call_events(
|
||||
response=full_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
)
|
||||
return tool_calls_list
|
||||
|
||||
# --- 8b) If there are tool calls from last chunk but no available functions,
|
||||
# return the tool calls
|
||||
if tool_calls and not available_functions:
|
||||
return tool_calls
|
||||
|
||||
if not tool_calls and not accumulated_tool_args:
|
||||
if response_model and self.is_litellm:
|
||||
instructor_instance = InternalInstructor(
|
||||
content=full_response,
|
||||
@@ -994,10 +1030,11 @@ class LLM(BaseLLM):
|
||||
)
|
||||
return full_response
|
||||
|
||||
# --- 9) Handle tool calls if present
|
||||
tool_result = self._handle_tool_call(tool_calls, available_functions)
|
||||
if tool_result is not None:
|
||||
return tool_result
|
||||
# --- 9) Handle tool calls from last chunk if present (execute when available_functions provided)
|
||||
if tool_calls and available_functions:
|
||||
tool_result = self._handle_tool_call(tool_calls, available_functions)
|
||||
if tool_result is not None:
|
||||
return tool_result
|
||||
|
||||
# --- 10) Emit completion event and return response
|
||||
self._handle_emit_call_events(
|
||||
@@ -1234,8 +1271,17 @@ class LLM(BaseLLM):
|
||||
# --- 4) Check for tool calls
|
||||
tool_calls = getattr(response_message, "tool_calls", [])
|
||||
|
||||
# --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response
|
||||
if (not tool_calls or not available_functions) and text_response:
|
||||
# --- 5) If there are tool calls but no available functions, return the tool calls
|
||||
# This allows the caller (e.g., executor) to handle tool execution
|
||||
# This must be checked before the text response fallback because some LLMs
|
||||
# (e.g., Anthropic) return both text content and tool calls in the same response.
|
||||
# The isinstance check ensures we have actual tool call data (list), not
|
||||
# auto-generated attributes from mocks or unexpected types.
|
||||
if isinstance(tool_calls, list) and tool_calls and not available_functions:
|
||||
return tool_calls
|
||||
|
||||
# --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response
|
||||
if not tool_calls and text_response:
|
||||
self._handle_emit_call_events(
|
||||
response=text_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
@@ -1245,13 +1291,8 @@ class LLM(BaseLLM):
|
||||
)
|
||||
return text_response
|
||||
|
||||
# --- 6) If there are tool calls but no available functions, return the tool calls
|
||||
# This allows the caller (e.g., executor) to handle tool execution
|
||||
if tool_calls and not available_functions:
|
||||
return tool_calls
|
||||
|
||||
# --- 7) Handle tool calls if present (execute when available_functions provided)
|
||||
if tool_calls and available_functions:
|
||||
if isinstance(tool_calls, list) and tool_calls and available_functions:
|
||||
tool_result = self._handle_tool_call(
|
||||
tool_calls, available_functions, from_task, from_agent
|
||||
)
|
||||
@@ -1364,7 +1405,16 @@ class LLM(BaseLLM):
|
||||
|
||||
tool_calls = getattr(response_message, "tool_calls", [])
|
||||
|
||||
if (not tool_calls or not available_functions) and text_response:
|
||||
# If there are tool calls but no available functions, return the tool calls
|
||||
# This allows the caller (e.g., executor) to handle tool execution
|
||||
# This must be checked before the text response fallback because some LLMs
|
||||
# (e.g., Anthropic) return both text content and tool calls in the same response.
|
||||
# The isinstance check ensures we have actual tool call data (list), not
|
||||
# auto-generated attributes from mocks or unexpected types.
|
||||
if isinstance(tool_calls, list) and tool_calls and not available_functions:
|
||||
return tool_calls
|
||||
|
||||
if not tool_calls and text_response:
|
||||
self._handle_emit_call_events(
|
||||
response=text_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
@@ -1374,13 +1424,8 @@ class LLM(BaseLLM):
|
||||
)
|
||||
return text_response
|
||||
|
||||
# If there are tool calls but no available functions, return the tool calls
|
||||
# This allows the caller (e.g., executor) to handle tool execution
|
||||
if tool_calls and not available_functions:
|
||||
return tool_calls
|
||||
|
||||
# Handle tool calls if present (execute when available_functions provided)
|
||||
if tool_calls and available_functions:
|
||||
if isinstance(tool_calls, list) and tool_calls and available_functions:
|
||||
tool_result = self._handle_tool_call(
|
||||
tool_calls, available_functions, from_task, from_agent
|
||||
)
|
||||
@@ -1513,7 +1558,7 @@ class LLM(BaseLLM):
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
if accumulated_tool_args and available_functions:
|
||||
if accumulated_tool_args:
|
||||
# Convert accumulated tool args to ChatCompletionDeltaToolCall objects
|
||||
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
|
||||
ChatCompletionDeltaToolCall(
|
||||
@@ -1527,7 +1572,14 @@ class LLM(BaseLLM):
|
||||
if tool_arg.function.name
|
||||
]
|
||||
|
||||
if tool_calls_list:
|
||||
# If there are tool calls but no available functions, return the tool calls
|
||||
# This allows the caller (e.g., executor) to handle tool execution.
|
||||
# This must be checked before the text response fallback because some LLMs
|
||||
# (e.g., Anthropic) return both text content and tool calls in the same response.
|
||||
if tool_calls_list and not available_functions:
|
||||
return tool_calls_list
|
||||
|
||||
if tool_calls_list and available_functions:
|
||||
result = self._handle_streaming_tool_calls(
|
||||
tool_calls=tool_calls_list,
|
||||
accumulated_tool_args=accumulated_tool_args,
|
||||
|
||||
@@ -614,6 +614,11 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
|
||||
def test_handle_streaming_tool_calls_no_available_functions(
|
||||
get_weather_tool_schema, mock_emit
|
||||
):
|
||||
"""When tools are provided but available_functions is not (defaults to None),
|
||||
the streaming handler should return the accumulated tool calls so the caller
|
||||
(e.g., CrewAgentExecutor) can handle them. This is the fix for issue #4788
|
||||
where tool calls were previously discarded and an empty string was returned.
|
||||
"""
|
||||
llm = LLM(model="openai/gpt-4o", stream=True, is_litellm=True)
|
||||
response = llm.call(
|
||||
messages=[
|
||||
@@ -621,7 +626,14 @@ def test_handle_streaming_tool_calls_no_available_functions(
|
||||
],
|
||||
tools=[get_weather_tool_schema],
|
||||
)
|
||||
assert response == ""
|
||||
# With the fix for #4788, tool calls should be returned as a list
|
||||
# instead of being discarded (previously returned "")
|
||||
assert isinstance(response, list), (
|
||||
f"Expected list of tool calls but got {type(response)}: {response}"
|
||||
)
|
||||
assert len(response) == 1
|
||||
assert response[0].function.name == "get_weather"
|
||||
assert response[0].function.arguments == '{"location":"New York, NY"}'
|
||||
|
||||
assert_event_count(
|
||||
mock_emit=mock_emit,
|
||||
@@ -1022,3 +1034,166 @@ async def test_usage_info_streaming_with_acall():
|
||||
assert llm._token_usage["total_tokens"] > 0
|
||||
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_non_streaming_tool_calls_returned_when_no_available_functions():
|
||||
"""Test that tool calls are returned (not text) when available_functions is None.
|
||||
|
||||
This reproduces the bug from issue #4788 where LLMs like Anthropic return both
|
||||
text content AND tool calls in the same response. When available_functions=None
|
||||
(as used by the executor for native tool handling), tool calls should be returned
|
||||
instead of the text content.
|
||||
"""
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Function
|
||||
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True)
|
||||
|
||||
# Mock a response that has BOTH text content AND tool calls
|
||||
mock_tool_call = ChatCompletionMessageToolCall(
|
||||
id="call_123",
|
||||
type="function",
|
||||
function=Function(
|
||||
name="code_search",
|
||||
arguments='{"query": "test query"}',
|
||||
),
|
||||
)
|
||||
mock_message = MagicMock()
|
||||
mock_message.content = "I will search for the given query."
|
||||
mock_message.tool_calls = [mock_tool_call]
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message = mock_message
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
with patch("litellm.completion", return_value=mock_response):
|
||||
# Call WITHOUT available_functions (as the executor does for native tool handling)
|
||||
result = llm.call(
|
||||
messages=[{"role": "user", "content": "Search for something"}],
|
||||
tools=[{"type": "function", "function": {"name": "code_search"}}],
|
||||
available_functions=None,
|
||||
)
|
||||
|
||||
# Result should be the tool calls list, NOT the text response
|
||||
assert isinstance(result, list), (
|
||||
f"Expected list of tool calls but got {type(result)}: {result}"
|
||||
)
|
||||
assert len(result) == 1
|
||||
assert result[0].function.name == "code_search"
|
||||
|
||||
|
||||
def test_non_streaming_text_returned_when_no_tool_calls():
|
||||
"""Test that text response is still returned when there are no tool calls."""
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True)
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.content = "The capital of France is Paris."
|
||||
mock_message.tool_calls = None
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message = mock_message
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
with patch("litellm.completion", return_value=mock_response):
|
||||
result = llm.call(
|
||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||
)
|
||||
|
||||
assert isinstance(result, str)
|
||||
assert result == "The capital of France is Paris."
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_non_streaming_tool_calls_returned_when_no_available_functions():
|
||||
"""Test async path: tool calls are returned (not text) when available_functions is None.
|
||||
|
||||
Same bug as #4788 but for the async non-streaming handler.
|
||||
"""
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Function
|
||||
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False)
|
||||
|
||||
mock_tool_call = ChatCompletionMessageToolCall(
|
||||
id="call_456",
|
||||
type="function",
|
||||
function=Function(
|
||||
name="web_search",
|
||||
arguments='{"query": "test"}',
|
||||
),
|
||||
)
|
||||
mock_message = MagicMock()
|
||||
mock_message.content = "I will search the web."
|
||||
mock_message.tool_calls = [mock_tool_call]
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message = mock_message
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
with patch("litellm.acompletion", return_value=mock_response):
|
||||
result = await llm.acall(
|
||||
messages=[{"role": "user", "content": "Search for something"}],
|
||||
tools=[{"type": "function", "function": {"name": "web_search"}}],
|
||||
available_functions=None,
|
||||
)
|
||||
|
||||
assert isinstance(result, list), (
|
||||
f"Expected list of tool calls but got {type(result)}: {result}"
|
||||
)
|
||||
assert len(result) == 1
|
||||
assert result[0].function.name == "web_search"
|
||||
|
||||
|
||||
def test_non_streaming_tool_calls_executed_when_available_functions_provided():
|
||||
"""Test that tool calls are still executed when available_functions IS provided.
|
||||
|
||||
This ensures the fix doesn't break the normal tool execution path.
|
||||
"""
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True)
|
||||
|
||||
mock_tool_call = MagicMock()
|
||||
mock_tool_call.function.name = "get_weather"
|
||||
mock_tool_call.function.arguments = '{"location": "New York"}'
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.content = "I will check the weather."
|
||||
mock_message.tool_calls = [mock_tool_call]
|
||||
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message = mock_message
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [mock_choice]
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.usage.total_tokens = 15
|
||||
|
||||
def get_weather(location: str) -> str:
|
||||
return f"Sunny in {location}"
|
||||
|
||||
with patch("litellm.completion", return_value=mock_response):
|
||||
result = llm.call(
|
||||
messages=[{"role": "user", "content": "What's the weather?"}],
|
||||
tools=[{"type": "function", "function": {"name": "get_weather"}}],
|
||||
available_functions={"get_weather": get_weather},
|
||||
)
|
||||
|
||||
# When available_functions is provided, the tool should be executed
|
||||
assert result == "Sunny in New York"
|
||||
|
||||
Reference in New Issue
Block a user