From 6d4fcbd7ee1833ee311d10ee386cc37898054030 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:45:12 +0000 Subject: [PATCH] fix: prioritize tool calls over text when available_functions is None MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When LLMs like Anthropic return both text content AND tool calls in the same response, the text response was being returned instead of the tool calls when available_functions=None. This caused the executor to treat the text as a final answer, discarding the tool calls. The fix reorders the priority checks in all 4 response handlers (_handle_non_streaming_response, _ahandle_non_streaming_response, _handle_streaming_response, _ahandle_streaming_response) so that tool calls are returned before falling back to text content when available_functions is None. Fixes #4788 Co-Authored-By: João --- lib/crewai/src/crewai/llm.py | 59 ++++++++----- lib/crewai/tests/test_llm.py | 163 +++++++++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+), 20 deletions(-) diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 8a4ac2edd..35b268ae2 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -967,7 +967,14 @@ class LLM(BaseLLM): self._track_token_usage_internal(usage_info) self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) - if not tool_calls or not available_functions: + # --- 8) If there are tool calls but no available functions, return the tool calls + # This allows the caller (e.g., executor) to handle tool execution. + # This must be checked before the text response fallback because some LLMs + # (e.g., Anthropic) return both text content and tool calls in the same response. + if tool_calls and not available_functions: + return tool_calls + + if not tool_calls: if response_model and self.is_litellm: instructor_instance = InternalInstructor( content=full_response, @@ -994,10 +1001,11 @@ class LLM(BaseLLM): ) return full_response - # --- 9) Handle tool calls if present - tool_result = self._handle_tool_call(tool_calls, available_functions) - if tool_result is not None: - return tool_result + # --- 9) Handle tool calls if present (execute when available_functions provided) + if tool_calls and available_functions: + tool_result = self._handle_tool_call(tool_calls, available_functions) + if tool_result is not None: + return tool_result # --- 10) Emit completion event and return response self._handle_emit_call_events( @@ -1234,8 +1242,15 @@ class LLM(BaseLLM): # --- 4) Check for tool calls tool_calls = getattr(response_message, "tool_calls", []) - # --- 5) If no tool calls or no available functions, return the text response directly as long as there is a text response - if (not tool_calls or not available_functions) and text_response: + # --- 5) If there are tool calls but no available functions, return the tool calls + # This allows the caller (e.g., executor) to handle tool execution + # This must be checked before the text response fallback because some LLMs + # (e.g., Anthropic) return both text content and tool calls in the same response. + if tool_calls and not available_functions: + return tool_calls + + # --- 6) If no tool calls or no available functions, return the text response directly as long as there is a text response + if not tool_calls and text_response: self._handle_emit_call_events( response=text_response, call_type=LLMCallType.LLM_CALL, @@ -1245,11 +1260,6 @@ class LLM(BaseLLM): ) return text_response - # --- 6) If there are tool calls but no available functions, return the tool calls - # This allows the caller (e.g., executor) to handle tool execution - if tool_calls and not available_functions: - return tool_calls - # --- 7) Handle tool calls if present (execute when available_functions provided) if tool_calls and available_functions: tool_result = self._handle_tool_call( @@ -1364,7 +1374,14 @@ class LLM(BaseLLM): tool_calls = getattr(response_message, "tool_calls", []) - if (not tool_calls or not available_functions) and text_response: + # If there are tool calls but no available functions, return the tool calls + # This allows the caller (e.g., executor) to handle tool execution + # This must be checked before the text response fallback because some LLMs + # (e.g., Anthropic) return both text content and tool calls in the same response. + if tool_calls and not available_functions: + return tool_calls + + if not tool_calls and text_response: self._handle_emit_call_events( response=text_response, call_type=LLMCallType.LLM_CALL, @@ -1374,11 +1391,6 @@ class LLM(BaseLLM): ) return text_response - # If there are tool calls but no available functions, return the tool calls - # This allows the caller (e.g., executor) to handle tool execution - if tool_calls and not available_functions: - return tool_calls - # Handle tool calls if present (execute when available_functions provided) if tool_calls and available_functions: tool_result = self._handle_tool_call( @@ -1513,7 +1525,7 @@ class LLM(BaseLLM): if usage_info: self._track_token_usage_internal(usage_info) - if accumulated_tool_args and available_functions: + if accumulated_tool_args: # Convert accumulated tool args to ChatCompletionDeltaToolCall objects tool_calls_list: list[ChatCompletionDeltaToolCall] = [ ChatCompletionDeltaToolCall( @@ -1527,7 +1539,14 @@ class LLM(BaseLLM): if tool_arg.function.name ] - if tool_calls_list: + # If there are tool calls but no available functions, return the tool calls + # This allows the caller (e.g., executor) to handle tool execution. + # This must be checked before the text response fallback because some LLMs + # (e.g., Anthropic) return both text content and tool calls in the same response. + if tool_calls_list and not available_functions: + return tool_calls_list + + if tool_calls_list and available_functions: result = self._handle_streaming_tool_calls( tool_calls=tool_calls_list, accumulated_tool_args=accumulated_tool_args, diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index 71cb69790..43f991e90 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -1022,3 +1022,166 @@ async def test_usage_info_streaming_with_acall(): assert llm._token_usage["total_tokens"] > 0 assert len(result) > 0 + + +def test_non_streaming_tool_calls_returned_when_no_available_functions(): + """Test that tool calls are returned (not text) when available_functions is None. + + This reproduces the bug from issue #4788 where LLMs like Anthropic return both + text content AND tool calls in the same response. When available_functions=None + (as used by the executor for native tool handling), tool calls should be returned + instead of the text content. + """ + from litellm.types.utils import ChatCompletionMessageToolCall, Function + + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + # Mock a response that has BOTH text content AND tool calls + mock_tool_call = ChatCompletionMessageToolCall( + id="call_123", + type="function", + function=Function( + name="code_search", + arguments='{"query": "test query"}', + ), + ) + mock_message = MagicMock() + mock_message.content = "I will search for the given query." + mock_message.tool_calls = [mock_tool_call] + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.usage = MagicMock() + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 5 + mock_response.usage.total_tokens = 15 + + with patch("litellm.completion", return_value=mock_response): + # Call WITHOUT available_functions (as the executor does for native tool handling) + result = llm.call( + messages=[{"role": "user", "content": "Search for something"}], + tools=[{"type": "function", "function": {"name": "code_search"}}], + available_functions=None, + ) + + # Result should be the tool calls list, NOT the text response + assert isinstance(result, list), ( + f"Expected list of tool calls but got {type(result)}: {result}" + ) + assert len(result) == 1 + assert result[0].function.name == "code_search" + + +def test_non_streaming_text_returned_when_no_tool_calls(): + """Test that text response is still returned when there are no tool calls.""" + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + mock_message = MagicMock() + mock_message.content = "The capital of France is Paris." + mock_message.tool_calls = None + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.usage = MagicMock() + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 5 + mock_response.usage.total_tokens = 15 + + with patch("litellm.completion", return_value=mock_response): + result = llm.call( + messages=[{"role": "user", "content": "What is the capital of France?"}], + ) + + assert isinstance(result, str) + assert result == "The capital of France is Paris." + + +@pytest.mark.asyncio +async def test_async_non_streaming_tool_calls_returned_when_no_available_functions(): + """Test async path: tool calls are returned (not text) when available_functions is None. + + Same bug as #4788 but for the async non-streaming handler. + """ + from litellm.types.utils import ChatCompletionMessageToolCall, Function + + llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=False) + + mock_tool_call = ChatCompletionMessageToolCall( + id="call_456", + type="function", + function=Function( + name="web_search", + arguments='{"query": "test"}', + ), + ) + mock_message = MagicMock() + mock_message.content = "I will search the web." + mock_message.tool_calls = [mock_tool_call] + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.usage = MagicMock() + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 5 + mock_response.usage.total_tokens = 15 + + with patch("litellm.acompletion", return_value=mock_response): + result = await llm.acall( + messages=[{"role": "user", "content": "Search for something"}], + tools=[{"type": "function", "function": {"name": "web_search"}}], + available_functions=None, + ) + + assert isinstance(result, list), ( + f"Expected list of tool calls but got {type(result)}: {result}" + ) + assert len(result) == 1 + assert result[0].function.name == "web_search" + + +def test_non_streaming_tool_calls_executed_when_available_functions_provided(): + """Test that tool calls are still executed when available_functions IS provided. + + This ensures the fix doesn't break the normal tool execution path. + """ + llm = LLM(model="gpt-4o-mini", is_litellm=True) + + mock_tool_call = MagicMock() + mock_tool_call.function.name = "get_weather" + mock_tool_call.function.arguments = '{"location": "New York"}' + + mock_message = MagicMock() + mock_message.content = "I will check the weather." + mock_message.tool_calls = [mock_tool_call] + + mock_choice = MagicMock() + mock_choice.message = mock_message + + mock_response = MagicMock() + mock_response.choices = [mock_choice] + mock_response.usage = MagicMock() + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 5 + mock_response.usage.total_tokens = 15 + + def get_weather(location: str) -> str: + return f"Sunny in {location}" + + with patch("litellm.completion", return_value=mock_response): + result = llm.call( + messages=[{"role": "user", "content": "What's the weather?"}], + tools=[{"type": "function", "function": {"name": "get_weather"}}], + available_functions={"get_weather": get_weather}, + ) + + # When available_functions is provided, the tool should be executed + assert result == "Sunny in New York"