diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 815dfe763..657488098 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -290,7 +290,7 @@ class AnthropicCompletion(BaseLLM): # Prepare completion parameters completion_params = self._prepare_completion_params( - formatted_messages, system_message, tools + formatted_messages, system_message, tools, available_functions ) effective_response_model = response_model or self.response_format @@ -361,7 +361,7 @@ class AnthropicCompletion(BaseLLM): ) completion_params = self._prepare_completion_params( - formatted_messages, system_message, tools + formatted_messages, system_message, tools, available_functions ) effective_response_model = response_model or self.response_format @@ -396,6 +396,7 @@ class AnthropicCompletion(BaseLLM): messages: list[LLMMessage], system_message: str | None = None, tools: list[dict[str, Any]] | None = None, + available_functions: dict[str, Any] | None = None, ) -> dict[str, Any]: """Prepare parameters for Anthropic messages API. @@ -403,6 +404,8 @@ class AnthropicCompletion(BaseLLM): messages: Formatted messages for Anthropic system_message: Extracted system message tools: Tool definitions + available_functions: Available functions for tool calling. When provided + with a single tool, tool_choice is automatically set to force tool use. Returns: Parameters dictionary for Anthropic API @@ -428,7 +431,13 @@ class AnthropicCompletion(BaseLLM): # Handle tools for Claude 3+ if tools and self.supports_tools: - params["tools"] = self._convert_tools_for_interference(tools) + converted_tools = self._convert_tools_for_interference(tools) + params["tools"] = converted_tools + + if available_functions and len(converted_tools) == 1: + tool_name = converted_tools[0].get("name") + if tool_name and tool_name in available_functions: + params["tool_choice"] = {"type": "tool", "name": tool_name} if self.thinking: if isinstance(self.thinking, AnthropicThinkingConfig): @@ -730,15 +739,11 @@ class AnthropicCompletion(BaseLLM): ) return list(tool_uses) - # Handle tool use conversation flow internally - return self._handle_tool_use_conversation( - response, - tool_uses, - params, - available_functions, - from_task, - from_agent, + result = self._execute_first_tool( + tool_uses, available_functions, from_task, from_agent ) + if result is not None: + return result content = "" thinking_blocks: list[ThinkingBlock] = [] @@ -939,14 +944,12 @@ class AnthropicCompletion(BaseLLM): if not available_functions: return list(tool_uses) - return self._handle_tool_use_conversation( - final_message, - tool_uses, - params, - available_functions, - from_task, - from_agent, + # Execute first tool and return result directly + result = self._execute_first_tool( + tool_uses, available_functions, from_task, from_agent ) + if result is not None: + return result full_response = self._apply_stop_words(full_response) @@ -1005,6 +1008,41 @@ class AnthropicCompletion(BaseLLM): return tool_results + def _execute_first_tool( + self, + tool_uses: list[ToolUseBlock | BetaToolUseBlock], + available_functions: dict[str, Any], + from_task: Any | None = None, + from_agent: Any | None = None, + ) -> Any | None: + """Execute the first tool from the tool_uses list and return its result. + + This is used when available_functions is provided, to directly execute + the tool and return its result (matching OpenAI behavior for use cases + like reasoning_handler). + + Args: + tool_uses: List of tool use blocks from Claude's response + available_functions: Available functions for tool calling + from_task: Task that initiated the call + from_agent: Agent that initiated the call + + Returns: + The result of the first tool execution, or None if execution failed + """ + tool_use = tool_uses[0] + function_name = tool_use.name + function_args = cast(dict[str, Any], tool_use.input) + + return self._handle_tool_execution( + function_name=function_name, + function_args=function_args, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + ) + + # TODO: we drop this def _handle_tool_use_conversation( self, initial_response: Message | BetaMessage, @@ -1220,14 +1258,11 @@ class AnthropicCompletion(BaseLLM): ) return list(tool_uses) - return await self._ahandle_tool_use_conversation( - response, - tool_uses, - params, - available_functions, - from_task, - from_agent, + result = self._execute_first_tool( + tool_uses, available_functions, from_task, from_agent ) + if result is not None: + return result content = "" if response.content: @@ -1408,14 +1443,11 @@ class AnthropicCompletion(BaseLLM): if not available_functions: return list(tool_uses) - return await self._ahandle_tool_use_conversation( - final_message, - tool_uses, - params, - available_functions, - from_task, - from_agent, + result = self._execute_first_tool( + tool_uses, available_functions, from_task, from_agent ) + if result is not None: + return result full_response = self._apply_stop_words(full_response) diff --git a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_returns_tool_result_directly.yaml b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_returns_tool_result_directly.yaml new file mode 100644 index 000000000..96e4b687a --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_returns_tool_result_directly.yaml @@ -0,0 +1,102 @@ +interactions: +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Calculate 5 + + 3 using the simple_calculator tool with operation ''add''."}],"model":"claude-3-5-haiku-20241022","stream":false,"tool_choice":{"type":"tool","name":"simple_calculator"},"tools":[{"name":"simple_calculator","description":"Perform + simple math operations","input_schema":{"type":"object","properties":{"operation":{"type":"string","enum":["add","multiply"],"description":"The + operation to perform"},"a":{"type":"integer","description":"First number"},"b":{"type":"integer","description":"Second + number"}},"required":["operation","a","b"]}}]}' + headers: + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '608' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - X-USER-AGENT-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01Q2F83aAeqqTCxsd8WpZjK7","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01BW4XkHnhRVM5JZsvoaQKw5","name":"simple_calculator","input":{"operation":"add","a":5,"b":3}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":498,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":67,"service_tier":"standard"}}' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - application/json + Date: + - Tue, 03 Feb 2026 23:26:35 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-requests-limit: + - '4000' + anthropic-ratelimit-requests-remaining: + - '3999' + anthropic-ratelimit-requests-reset: + - '2026-02-03T23:26:34Z' + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '1228' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_with_available_functions.yaml b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_with_available_functions.yaml new file mode 100644 index 000000000..78638ca0b --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_with_available_functions.yaml @@ -0,0 +1,108 @@ +interactions: +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Create a simple + plan to say hello. Use the create_reasoning_plan tool."}],"model":"claude-3-5-haiku-20241022","stream":false,"tool_choice":{"type":"tool","name":"create_reasoning_plan"},"tools":[{"name":"create_reasoning_plan","description":"Create + a structured reasoning plan for completing a task","input_schema":{"type":"object","properties":{"plan":{"type":"string","description":"High-level + plan description"},"steps":{"type":"array","items":{"type":"object"},"description":"List + of steps to execute"},"ready":{"type":"boolean","description":"Whether the plan + is ready to execute"}},"required":["plan","steps","ready"]}}]}' + headers: + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '684' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - X-USER-AGENT-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01HLuGgGRFseMdhTYAhkKtfz","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01GQAUFHffGzMd3ufA6YRMZF","name":"create_reasoning_plan","input":{"plan":"Say + hello in a friendly and straightforward manner","steps":[{"description":"Take + a deep breath","action":"Pause and relax"},{"description":"Smile","action":"Prepare + a warm facial expression"},{"description":"Greet the person","action":"Say + ''Hello!''"},{"description":"Wait for response","action":"Listen and be ready + to continue conversation"}],"ready":true}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":513,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":162,"service_tier":"standard"}}' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - application/json + Date: + - Tue, 03 Feb 2026 23:26:38 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-requests-limit: + - '4000' + anthropic-ratelimit-requests-remaining: + - '3999' + anthropic-ratelimit-requests-reset: + - '2026-02-03T23:26:35Z' + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '2994' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index 8d4b69e20..c5ad5f273 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -45,85 +45,6 @@ def test_anthropic_completion_is_used_when_claude_provider(): -def test_anthropic_tool_use_conversation_flow(): - """ - Test that the Anthropic completion properly handles tool use conversation flow - """ - from unittest.mock import Mock, patch - from crewai.llms.providers.anthropic.completion import AnthropicCompletion - from anthropic.types.tool_use_block import ToolUseBlock - - # Create AnthropicCompletion instance - completion = AnthropicCompletion(model="claude-3-5-sonnet-20241022") - - # Mock tool function - def mock_weather_tool(location: str) -> str: - return f"The weather in {location} is sunny and 75°F" - - available_functions = {"get_weather": mock_weather_tool} - - # Mock the Anthropic client responses - with patch.object(completion.client.messages, 'create') as mock_create: - # Mock initial response with tool use - need to properly mock ToolUseBlock - mock_tool_use = Mock(spec=ToolUseBlock) - mock_tool_use.type = "tool_use" - mock_tool_use.id = "tool_123" - mock_tool_use.name = "get_weather" - mock_tool_use.input = {"location": "San Francisco"} - - mock_initial_response = Mock() - mock_initial_response.content = [mock_tool_use] - mock_initial_response.usage = Mock() - mock_initial_response.usage.input_tokens = 100 - mock_initial_response.usage.output_tokens = 50 - - # Mock final response after tool result - properly mock text content - mock_text_block = Mock() - mock_text_block.type = "text" - # Set the text attribute as a string, not another Mock - mock_text_block.configure_mock(text="Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature.") - - mock_final_response = Mock() - mock_final_response.content = [mock_text_block] - mock_final_response.usage = Mock() - mock_final_response.usage.input_tokens = 150 - mock_final_response.usage.output_tokens = 75 - - # Configure mock to return different responses on successive calls - mock_create.side_effect = [mock_initial_response, mock_final_response] - - # Test the call - messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] - result = completion.call( - messages=messages, - available_functions=available_functions - ) - - # Verify the result contains the final response - assert "beautiful day in San Francisco" in result - assert "sunny skies" in result - assert "75°F" in result - - # Verify that two API calls were made (initial + follow-up) - assert mock_create.call_count == 2 - - # Verify the second call includes tool results - second_call_args = mock_create.call_args_list[1][1] # kwargs of second call - messages_in_second_call = second_call_args["messages"] - - # Should have original user message + assistant tool use + user tool result - assert len(messages_in_second_call) == 3 - assert messages_in_second_call[0]["role"] == "user" - assert messages_in_second_call[1]["role"] == "assistant" - assert messages_in_second_call[2]["role"] == "user" - - # Verify tool result format - tool_result = messages_in_second_call[2]["content"][0] - assert tool_result["type"] == "tool_result" - assert tool_result["tool_use_id"] == "tool_123" - assert "sunny and 75°F" in tool_result["content"] - - def test_anthropic_completion_module_is_imported(): """ Test that the completion module is properly imported when using Anthropic provider @@ -874,6 +795,125 @@ def test_anthropic_function_calling(): # ============================================================================= +@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"]) +def test_anthropic_tool_execution_with_available_functions(): + """ + Test that Anthropic provider correctly executes tools when available_functions is provided. + + This specifically tests the fix for double llm_call_completed emission - when + available_functions is provided, _handle_tool_execution is called which already + emits llm_call_completed, so the caller should not emit it again. + + The test verifies: + 1. The tool is called with correct arguments + 2. The tool result is returned directly (not wrapped in conversation) + 3. The result is valid JSON matching the tool output format + """ + import json + + llm = LLM(model="anthropic/claude-3-5-haiku-20241022") + + # Simple tool that returns a formatted string + def create_reasoning_plan(plan: str, steps: list, ready: bool) -> str: + """Create a reasoning plan with steps.""" + return json.dumps({"plan": plan, "steps": steps, "ready": ready}) + + tools = [ + { + "name": "create_reasoning_plan", + "description": "Create a structured reasoning plan for completing a task", + "input_schema": { + "type": "object", + "properties": { + "plan": { + "type": "string", + "description": "High-level plan description" + }, + "steps": { + "type": "array", + "items": {"type": "object"}, + "description": "List of steps to execute" + }, + "ready": { + "type": "boolean", + "description": "Whether the plan is ready to execute" + } + }, + "required": ["plan", "steps", "ready"] + } + } + ] + + result = llm.call( + messages=[{"role": "user", "content": "Create a simple plan to say hello. Use the create_reasoning_plan tool."}], + tools=tools, + available_functions={"create_reasoning_plan": create_reasoning_plan} + ) + + # Verify result is valid JSON from the tool + assert result is not None + assert isinstance(result, str) + + # Parse the result to verify it's valid JSON + parsed_result = json.loads(result) + assert "plan" in parsed_result + assert "steps" in parsed_result + assert "ready" in parsed_result + + +@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"]) +def test_anthropic_tool_execution_returns_tool_result_directly(): + """ + Test that when available_functions is provided, the tool result is returned directly + without additional LLM conversation (matching OpenAI behavior for reasoning_handler). + """ + llm = LLM(model="anthropic/claude-3-5-haiku-20241022") + + call_count = 0 + + def simple_calculator(operation: str, a: int, b: int) -> str: + """Perform a simple calculation.""" + nonlocal call_count + call_count += 1 + if operation == "add": + return str(a + b) + elif operation == "multiply": + return str(a * b) + return "Unknown operation" + + tools = [ + { + "name": "simple_calculator", + "description": "Perform simple math operations", + "input_schema": { + "type": "object", + "properties": { + "operation": { + "type": "string", + "enum": ["add", "multiply"], + "description": "The operation to perform" + }, + "a": {"type": "integer", "description": "First number"}, + "b": {"type": "integer", "description": "Second number"} + }, + "required": ["operation", "a", "b"] + } + } + ] + + result = llm.call( + messages=[{"role": "user", "content": "Calculate 5 + 3 using the simple_calculator tool with operation 'add'."}], + tools=tools, + available_functions={"simple_calculator": simple_calculator} + ) + + # Tool should have been called exactly once + assert call_count == 1, f"Expected tool to be called once, got {call_count}" + + # Result should be the direct tool output + assert result == "8", f"Expected '8' but got '{result}'" + + @pytest.mark.vcr() def test_anthropic_agent_kickoff_structured_output_without_tools(): """