Lorenze/fix/anthropic available functions call (#4360)

* feat: enhance AnthropicCompletion to support available functions in tool execution - Updated the `_prepare_completion_params` method to accept `available_functions` for better tool handling. - Modified tool execution logic to directly return results from tools when `available_functions` is provided, aligning behavior with OpenAI's model. - Added new test cases to validate the execution of tools with available functions, ensuring correct argument passing and result formatting. This change improves the flexibility and usability of the Anthropic LLM integration, allowing for more complex interactions with tools. * refactor: remove redundant event emission in AnthropicCompletion * fix test * dry up
2026-05-01 07:13:00 +00:00 · 2026-02-03 16:30:43 -08:00
parent d3f424fd8f
commit 3fec4669af
4 changed files with 393 additions and 111 deletions
--- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -290,7 +290,7 @@ class AnthropicCompletion(BaseLLM):

                # Prepare completion parameters
                completion_params = self._prepare_completion_params(
-                    formatted_messages, system_message, tools
+                    formatted_messages, system_message, tools, available_functions
                )

                effective_response_model = response_model or self.response_format
@@ -361,7 +361,7 @@ class AnthropicCompletion(BaseLLM):
                )

                completion_params = self._prepare_completion_params(
-                    formatted_messages, system_message, tools
+                    formatted_messages, system_message, tools, available_functions
                )

                effective_response_model = response_model or self.response_format
@@ -396,6 +396,7 @@ class AnthropicCompletion(BaseLLM):
        messages: list[LLMMessage],
        system_message: str | None = None,
        tools: list[dict[str, Any]] | None = None,
+        available_functions: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
        """Prepare parameters for Anthropic messages API.

@@ -403,6 +404,8 @@ class AnthropicCompletion(BaseLLM):
            messages: Formatted messages for Anthropic
            system_message: Extracted system message
            tools: Tool definitions
+            available_functions: Available functions for tool calling. When provided
+                with a single tool, tool_choice is automatically set to force tool use.

        Returns:
            Parameters dictionary for Anthropic API
@@ -428,7 +431,13 @@ class AnthropicCompletion(BaseLLM):

        # Handle tools for Claude 3+
        if tools and self.supports_tools:
-            params["tools"] = self._convert_tools_for_interference(tools)
+            converted_tools = self._convert_tools_for_interference(tools)
+            params["tools"] = converted_tools
+
+            if available_functions and len(converted_tools) == 1:
+                tool_name = converted_tools[0].get("name")
+                if tool_name and tool_name in available_functions:
+                    params["tool_choice"] = {"type": "tool", "name": tool_name}

        if self.thinking:
            if isinstance(self.thinking, AnthropicThinkingConfig):
@@ -730,15 +739,11 @@ class AnthropicCompletion(BaseLLM):
                    )
                    return list(tool_uses)

-                # Handle tool use conversation flow internally
-                return self._handle_tool_use_conversation(
-                    response,
-                    tool_uses,
-                    params,
-                    available_functions,
-                    from_task,
-                    from_agent,
+                result = self._execute_first_tool(
+                    tool_uses, available_functions, from_task, from_agent
                )
+                if result is not None:
+                    return result

        content = ""
        thinking_blocks: list[ThinkingBlock] = []
@@ -939,14 +944,12 @@ class AnthropicCompletion(BaseLLM):
                if not available_functions:
                    return list(tool_uses)

-                return self._handle_tool_use_conversation(
-                    final_message,
-                    tool_uses,
-                    params,
-                    available_functions,
-                    from_task,
-                    from_agent,
+                # Execute first tool and return result directly
+                result = self._execute_first_tool(
+                    tool_uses, available_functions, from_task, from_agent
                )
+                if result is not None:
+                    return result

        full_response = self._apply_stop_words(full_response)

@@ -1005,6 +1008,41 @@ class AnthropicCompletion(BaseLLM):

        return tool_results

+    def _execute_first_tool(
+        self,
+        tool_uses: list[ToolUseBlock | BetaToolUseBlock],
+        available_functions: dict[str, Any],
+        from_task: Any | None = None,
+        from_agent: Any | None = None,
+    ) -> Any | None:
+        """Execute the first tool from the tool_uses list and return its result.
+
+        This is used when available_functions is provided, to directly execute
+        the tool and return its result (matching OpenAI behavior for use cases
+        like reasoning_handler).
+
+        Args:
+            tool_uses: List of tool use blocks from Claude's response
+            available_functions: Available functions for tool calling
+            from_task: Task that initiated the call
+            from_agent: Agent that initiated the call
+
+        Returns:
+            The result of the first tool execution, or None if execution failed
+        """
+        tool_use = tool_uses[0]
+        function_name = tool_use.name
+        function_args = cast(dict[str, Any], tool_use.input)
+
+        return self._handle_tool_execution(
+            function_name=function_name,
+            function_args=function_args,
+            available_functions=available_functions,
+            from_task=from_task,
+            from_agent=from_agent,
+        )
+
+    # TODO: we drop this
    def _handle_tool_use_conversation(
        self,
        initial_response: Message | BetaMessage,
@@ -1220,14 +1258,11 @@ class AnthropicCompletion(BaseLLM):
                    )
                    return list(tool_uses)

-                return await self._ahandle_tool_use_conversation(
-                    response,
-                    tool_uses,
-                    params,
-                    available_functions,
-                    from_task,
-                    from_agent,
+                result = self._execute_first_tool(
+                    tool_uses, available_functions, from_task, from_agent
                )
+                if result is not None:
+                    return result

        content = ""
        if response.content:
@@ -1408,14 +1443,11 @@ class AnthropicCompletion(BaseLLM):
                if not available_functions:
                    return list(tool_uses)

-                return await self._ahandle_tool_use_conversation(
-                    final_message,
-                    tool_uses,
-                    params,
-                    available_functions,
-                    from_task,
-                    from_agent,
+                result = self._execute_first_tool(
+                    tool_uses, available_functions, from_task, from_agent
                )
+                if result is not None:
+                    return result

        full_response = self._apply_stop_words(full_response)

--- a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_returns_tool_result_directly.yaml
+++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_returns_tool_result_directly.yaml
@@ -0,0 +1,102 @@
+interactions:
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Calculate 5 +
+      3 using the simple_calculator tool with operation ''add''."}],"model":"claude-3-5-haiku-20241022","stream":false,"tool_choice":{"type":"tool","name":"simple_calculator"},"tools":[{"name":"simple_calculator","description":"Perform
+      simple math operations","input_schema":{"type":"object","properties":{"operation":{"type":"string","enum":["add","multiply"],"description":"The
+      operation to perform"},"a":{"type":"integer","description":"First number"},"b":{"type":"integer","description":"Second
+      number"}},"required":["operation","a","b"]}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '608'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - X-USER-AGENT-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01Q2F83aAeqqTCxsd8WpZjK7","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01BW4XkHnhRVM5JZsvoaQKw5","name":"simple_calculator","input":{"operation":"add","a":5,"b":3}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":498,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":67,"service_tier":"standard"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 03 Feb 2026 23:26:35 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-03T23:26:34Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '1228'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_with_available_functions.yaml
+++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_tool_execution_with_available_functions.yaml
@@ -0,0 +1,108 @@
+interactions:
+- request:
+    body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Create a simple
+      plan to say hello. Use the create_reasoning_plan tool."}],"model":"claude-3-5-haiku-20241022","stream":false,"tool_choice":{"type":"tool","name":"create_reasoning_plan"},"tools":[{"name":"create_reasoning_plan","description":"Create
+      a structured reasoning plan for completing a task","input_schema":{"type":"object","properties":{"plan":{"type":"string","description":"High-level
+      plan description"},"steps":{"type":"array","items":{"type":"object"},"description":"List
+      of steps to execute"},"ready":{"type":"boolean","description":"Whether the plan
+      is ready to execute"}},"required":["plan","steps","ready"]}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '684'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - X-USER-AGENT-XXX
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 0.73.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01HLuGgGRFseMdhTYAhkKtfz","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01GQAUFHffGzMd3ufA6YRMZF","name":"create_reasoning_plan","input":{"plan":"Say
+        hello in a friendly and straightforward manner","steps":[{"description":"Take
+        a deep breath","action":"Pause and relax"},{"description":"Smile","action":"Prepare
+        a warm facial expression"},{"description":"Greet the person","action":"Say
+        ''Hello!''"},{"description":"Wait for response","action":"Listen and be ready
+        to continue conversation"}],"ready":true}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":513,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":162,"service_tier":"standard"}}'
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Security-Policy:
+      - CSP-FILTERED
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 03 Feb 2026 23:26:38 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - ANTHROPIC-ORGANIZATION-ID-XXX
+      anthropic-ratelimit-input-tokens-limit:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-input-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-input-tokens-reset:
+      - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-output-tokens-limit:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-output-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-output-tokens-reset:
+      - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
+      anthropic-ratelimit-requests-limit:
+      - '4000'
+      anthropic-ratelimit-requests-remaining:
+      - '3999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-02-03T23:26:35Z'
+      anthropic-ratelimit-tokens-limit:
+      - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
+      anthropic-ratelimit-tokens-remaining:
+      - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
+      anthropic-ratelimit-tokens-reset:
+      - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - REQUEST-ID-XXX
+      strict-transport-security:
+      - STS-XXX
+      x-envoy-upstream-service-time:
+      - '2994'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/llms/anthropic/test_anthropic.py
+++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py
@@ -45,85 +45,6 @@ def test_anthropic_completion_is_used_when_claude_provider():



-def test_anthropic_tool_use_conversation_flow():
-    """
-    Test that the Anthropic completion properly handles tool use conversation flow
-    """
-    from unittest.mock import Mock, patch
-    from crewai.llms.providers.anthropic.completion import AnthropicCompletion
-    from anthropic.types.tool_use_block import ToolUseBlock
-
-    # Create AnthropicCompletion instance
-    completion = AnthropicCompletion(model="claude-3-5-sonnet-20241022")
-
-    # Mock tool function
-    def mock_weather_tool(location: str) -> str:
-        return f"The weather in {location} is sunny and 75°F"
-
-    available_functions = {"get_weather": mock_weather_tool}
-
-    # Mock the Anthropic client responses
-    with patch.object(completion.client.messages, 'create') as mock_create:
-        # Mock initial response with tool use - need to properly mock ToolUseBlock
-        mock_tool_use = Mock(spec=ToolUseBlock)
-        mock_tool_use.type = "tool_use"
-        mock_tool_use.id = "tool_123"
-        mock_tool_use.name = "get_weather"
-        mock_tool_use.input = {"location": "San Francisco"}
-
-        mock_initial_response = Mock()
-        mock_initial_response.content = [mock_tool_use]
-        mock_initial_response.usage = Mock()
-        mock_initial_response.usage.input_tokens = 100
-        mock_initial_response.usage.output_tokens = 50
-
-        # Mock final response after tool result - properly mock text content
-        mock_text_block = Mock()
-        mock_text_block.type = "text"
-        # Set the text attribute as a string, not another Mock
-        mock_text_block.configure_mock(text="Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature.")
-
-        mock_final_response = Mock()
-        mock_final_response.content = [mock_text_block]
-        mock_final_response.usage = Mock()
-        mock_final_response.usage.input_tokens = 150
-        mock_final_response.usage.output_tokens = 75
-
-        # Configure mock to return different responses on successive calls
-        mock_create.side_effect = [mock_initial_response, mock_final_response]
-
-        # Test the call
-        messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}]
-        result = completion.call(
-            messages=messages,
-            available_functions=available_functions
-        )
-
-        # Verify the result contains the final response
-        assert "beautiful day in San Francisco" in result
-        assert "sunny skies" in result
-        assert "75°F" in result
-
-        # Verify that two API calls were made (initial + follow-up)
-        assert mock_create.call_count == 2
-
-        # Verify the second call includes tool results
-        second_call_args = mock_create.call_args_list[1][1]  # kwargs of second call
-        messages_in_second_call = second_call_args["messages"]
-
-        # Should have original user message + assistant tool use + user tool result
-        assert len(messages_in_second_call) == 3
-        assert messages_in_second_call[0]["role"] == "user"
-        assert messages_in_second_call[1]["role"] == "assistant"
-        assert messages_in_second_call[2]["role"] == "user"
-
-        # Verify tool result format
-        tool_result = messages_in_second_call[2]["content"][0]
-        assert tool_result["type"] == "tool_result"
-        assert tool_result["tool_use_id"] == "tool_123"
-        assert "sunny and 75°F" in tool_result["content"]
-
-
 def test_anthropic_completion_module_is_imported():
    """
    Test that the completion module is properly imported when using Anthropic provider
@@ -874,6 +795,125 @@ def test_anthropic_function_calling():
 # =============================================================================


+@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
+def test_anthropic_tool_execution_with_available_functions():
+    """
+    Test that Anthropic provider correctly executes tools when available_functions is provided.
+
+    This specifically tests the fix for double llm_call_completed emission - when
+    available_functions is provided, _handle_tool_execution is called which already
+    emits llm_call_completed, so the caller should not emit it again.
+
+    The test verifies:
+    1. The tool is called with correct arguments
+    2. The tool result is returned directly (not wrapped in conversation)
+    3. The result is valid JSON matching the tool output format
+    """
+    import json
+
+    llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+
+    # Simple tool that returns a formatted string
+    def create_reasoning_plan(plan: str, steps: list, ready: bool) -> str:
+        """Create a reasoning plan with steps."""
+        return json.dumps({"plan": plan, "steps": steps, "ready": ready})
+
+    tools = [
+        {
+            "name": "create_reasoning_plan",
+            "description": "Create a structured reasoning plan for completing a task",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "plan": {
+                        "type": "string",
+                        "description": "High-level plan description"
+                    },
+                    "steps": {
+                        "type": "array",
+                        "items": {"type": "object"},
+                        "description": "List of steps to execute"
+                    },
+                    "ready": {
+                        "type": "boolean",
+                        "description": "Whether the plan is ready to execute"
+                    }
+                },
+                "required": ["plan", "steps", "ready"]
+            }
+        }
+    ]
+
+    result = llm.call(
+        messages=[{"role": "user", "content": "Create a simple plan to say hello. Use the create_reasoning_plan tool."}],
+        tools=tools,
+        available_functions={"create_reasoning_plan": create_reasoning_plan}
+    )
+
+    # Verify result is valid JSON from the tool
+    assert result is not None
+    assert isinstance(result, str)
+
+    # Parse the result to verify it's valid JSON
+    parsed_result = json.loads(result)
+    assert "plan" in parsed_result
+    assert "steps" in parsed_result
+    assert "ready" in parsed_result
+
+
+@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
+def test_anthropic_tool_execution_returns_tool_result_directly():
+    """
+    Test that when available_functions is provided, the tool result is returned directly
+    without additional LLM conversation (matching OpenAI behavior for reasoning_handler).
+    """
+    llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+
+    call_count = 0
+
+    def simple_calculator(operation: str, a: int, b: int) -> str:
+        """Perform a simple calculation."""
+        nonlocal call_count
+        call_count += 1
+        if operation == "add":
+            return str(a + b)
+        elif operation == "multiply":
+            return str(a * b)
+        return "Unknown operation"
+
+    tools = [
+        {
+            "name": "simple_calculator",
+            "description": "Perform simple math operations",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "operation": {
+                        "type": "string",
+                        "enum": ["add", "multiply"],
+                        "description": "The operation to perform"
+                    },
+                    "a": {"type": "integer", "description": "First number"},
+                    "b": {"type": "integer", "description": "Second number"}
+                },
+                "required": ["operation", "a", "b"]
+            }
+        }
+    ]
+
+    result = llm.call(
+        messages=[{"role": "user", "content": "Calculate 5 + 3 using the simple_calculator tool with operation 'add'."}],
+        tools=tools,
+        available_functions={"simple_calculator": simple_calculator}
+    )
+
+    # Tool should have been called exactly once
+    assert call_count == 1, f"Expected tool to be called once, got {call_count}"
+
+    # Result should be the direct tool output
+    assert result == "8", f"Expected '8' but got '{result}'"
+
+
@pytest.mark.vcr()
 def test_anthropic_agent_kickoff_structured_output_without_tools():
    """