Lorenze/fix/anthropic available functions call (#4360)

* feat: enhance AnthropicCompletion to support available functions in tool execution - Updated the `_prepare_completion_params` method to accept `available_functions` for better tool handling. - Modified tool execution logic to directly return results from tools when `available_functions` is provided, aligning behavior with OpenAI's model. - Added new test cases to validate the execution of tools with available functions, ensuring correct argument passing and result formatting. This change improves the flexibility and usability of the Anthropic LLM integration, allowing for more complex interactions with tools. * refactor: remove redundant event emission in AnthropicCompletion * fix test * dry up
2026-05-03 08:12:39 +00:00 · 2026-02-03 16:30:43 -08:00
parent d3f424fd8f
commit 3fec4669af
4 changed files with 393 additions and 111 deletions
--- a/lib/crewai/tests/llms/anthropic/test_anthropic.py
+++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py
@@ -45,85 +45,6 @@ def test_anthropic_completion_is_used_when_claude_provider():



-def test_anthropic_tool_use_conversation_flow():
-    """
-    Test that the Anthropic completion properly handles tool use conversation flow
-    """
-    from unittest.mock import Mock, patch
-    from crewai.llms.providers.anthropic.completion import AnthropicCompletion
-    from anthropic.types.tool_use_block import ToolUseBlock
-
-    # Create AnthropicCompletion instance
-    completion = AnthropicCompletion(model="claude-3-5-sonnet-20241022")
-
-    # Mock tool function
-    def mock_weather_tool(location: str) -> str:
-        return f"The weather in {location} is sunny and 75°F"
-
-    available_functions = {"get_weather": mock_weather_tool}
-
-    # Mock the Anthropic client responses
-    with patch.object(completion.client.messages, 'create') as mock_create:
-        # Mock initial response with tool use - need to properly mock ToolUseBlock
-        mock_tool_use = Mock(spec=ToolUseBlock)
-        mock_tool_use.type = "tool_use"
-        mock_tool_use.id = "tool_123"
-        mock_tool_use.name = "get_weather"
-        mock_tool_use.input = {"location": "San Francisco"}
-
-        mock_initial_response = Mock()
-        mock_initial_response.content = [mock_tool_use]
-        mock_initial_response.usage = Mock()
-        mock_initial_response.usage.input_tokens = 100
-        mock_initial_response.usage.output_tokens = 50
-
-        # Mock final response after tool result - properly mock text content
-        mock_text_block = Mock()
-        mock_text_block.type = "text"
-        # Set the text attribute as a string, not another Mock
-        mock_text_block.configure_mock(text="Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature.")
-
-        mock_final_response = Mock()
-        mock_final_response.content = [mock_text_block]
-        mock_final_response.usage = Mock()
-        mock_final_response.usage.input_tokens = 150
-        mock_final_response.usage.output_tokens = 75
-
-        # Configure mock to return different responses on successive calls
-        mock_create.side_effect = [mock_initial_response, mock_final_response]
-
-        # Test the call
-        messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}]
-        result = completion.call(
-            messages=messages,
-            available_functions=available_functions
-        )
-
-        # Verify the result contains the final response
-        assert "beautiful day in San Francisco" in result
-        assert "sunny skies" in result
-        assert "75°F" in result
-
-        # Verify that two API calls were made (initial + follow-up)
-        assert mock_create.call_count == 2
-
-        # Verify the second call includes tool results
-        second_call_args = mock_create.call_args_list[1][1]  # kwargs of second call
-        messages_in_second_call = second_call_args["messages"]
-
-        # Should have original user message + assistant tool use + user tool result
-        assert len(messages_in_second_call) == 3
-        assert messages_in_second_call[0]["role"] == "user"
-        assert messages_in_second_call[1]["role"] == "assistant"
-        assert messages_in_second_call[2]["role"] == "user"
-
-        # Verify tool result format
-        tool_result = messages_in_second_call[2]["content"][0]
-        assert tool_result["type"] == "tool_result"
-        assert tool_result["tool_use_id"] == "tool_123"
-        assert "sunny and 75°F" in tool_result["content"]
-
-
 def test_anthropic_completion_module_is_imported():
    """
    Test that the completion module is properly imported when using Anthropic provider
@@ -874,6 +795,125 @@ def test_anthropic_function_calling():
 # =============================================================================


+@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
+def test_anthropic_tool_execution_with_available_functions():
+    """
+    Test that Anthropic provider correctly executes tools when available_functions is provided.
+
+    This specifically tests the fix for double llm_call_completed emission - when
+    available_functions is provided, _handle_tool_execution is called which already
+    emits llm_call_completed, so the caller should not emit it again.
+
+    The test verifies:
+    1. The tool is called with correct arguments
+    2. The tool result is returned directly (not wrapped in conversation)
+    3. The result is valid JSON matching the tool output format
+    """
+    import json
+
+    llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+
+    # Simple tool that returns a formatted string
+    def create_reasoning_plan(plan: str, steps: list, ready: bool) -> str:
+        """Create a reasoning plan with steps."""
+        return json.dumps({"plan": plan, "steps": steps, "ready": ready})
+
+    tools = [
+        {
+            "name": "create_reasoning_plan",
+            "description": "Create a structured reasoning plan for completing a task",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "plan": {
+                        "type": "string",
+                        "description": "High-level plan description"
+                    },
+                    "steps": {
+                        "type": "array",
+                        "items": {"type": "object"},
+                        "description": "List of steps to execute"
+                    },
+                    "ready": {
+                        "type": "boolean",
+                        "description": "Whether the plan is ready to execute"
+                    }
+                },
+                "required": ["plan", "steps", "ready"]
+            }
+        }
+    ]
+
+    result = llm.call(
+        messages=[{"role": "user", "content": "Create a simple plan to say hello. Use the create_reasoning_plan tool."}],
+        tools=tools,
+        available_functions={"create_reasoning_plan": create_reasoning_plan}
+    )
+
+    # Verify result is valid JSON from the tool
+    assert result is not None
+    assert isinstance(result, str)
+
+    # Parse the result to verify it's valid JSON
+    parsed_result = json.loads(result)
+    assert "plan" in parsed_result
+    assert "steps" in parsed_result
+    assert "ready" in parsed_result
+
+
+@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
+def test_anthropic_tool_execution_returns_tool_result_directly():
+    """
+    Test that when available_functions is provided, the tool result is returned directly
+    without additional LLM conversation (matching OpenAI behavior for reasoning_handler).
+    """
+    llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+
+    call_count = 0
+
+    def simple_calculator(operation: str, a: int, b: int) -> str:
+        """Perform a simple calculation."""
+        nonlocal call_count
+        call_count += 1
+        if operation == "add":
+            return str(a + b)
+        elif operation == "multiply":
+            return str(a * b)
+        return "Unknown operation"
+
+    tools = [
+        {
+            "name": "simple_calculator",
+            "description": "Perform simple math operations",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "operation": {
+                        "type": "string",
+                        "enum": ["add", "multiply"],
+                        "description": "The operation to perform"
+                    },
+                    "a": {"type": "integer", "description": "First number"},
+                    "b": {"type": "integer", "description": "Second number"}
+                },
+                "required": ["operation", "a", "b"]
+            }
+        }
+    ]
+
+    result = llm.call(
+        messages=[{"role": "user", "content": "Calculate 5 + 3 using the simple_calculator tool with operation 'add'."}],
+        tools=tools,
+        available_functions={"simple_calculator": simple_calculator}
+    )
+
+    # Tool should have been called exactly once
+    assert call_count == 1, f"Expected tool to be called once, got {call_count}"
+
+    # Result should be the direct tool output
+    assert result == "8", f"Expected '8' but got '{result}'"
+
+
@pytest.mark.vcr()
 def test_anthropic_agent_kickoff_structured_output_without_tools():
    """