Lorenze/fix/anthropic available functions call (#4360)

* feat: enhance AnthropicCompletion to support available functions in tool execution

- Updated the `_prepare_completion_params` method to accept `available_functions` for better tool handling.
- Modified tool execution logic to directly return results from tools when `available_functions` is provided, aligning behavior with OpenAI's model.
- Added new test cases to validate the execution of tools with available functions, ensuring correct argument passing and result formatting.

This change improves the flexibility and usability of the Anthropic LLM integration, allowing for more complex interactions with tools.

* refactor: remove redundant event emission in AnthropicCompletion

* fix test

* dry up
This commit is contained in:
Lorenze Jay
2026-02-03 16:30:43 -08:00
committed by GitHub
parent d3f424fd8f
commit 3fec4669af
4 changed files with 393 additions and 111 deletions

View File

@@ -290,7 +290,7 @@ class AnthropicCompletion(BaseLLM):
# Prepare completion parameters
completion_params = self._prepare_completion_params(
formatted_messages, system_message, tools
formatted_messages, system_message, tools, available_functions
)
effective_response_model = response_model or self.response_format
@@ -361,7 +361,7 @@ class AnthropicCompletion(BaseLLM):
)
completion_params = self._prepare_completion_params(
formatted_messages, system_message, tools
formatted_messages, system_message, tools, available_functions
)
effective_response_model = response_model or self.response_format
@@ -396,6 +396,7 @@ class AnthropicCompletion(BaseLLM):
messages: list[LLMMessage],
system_message: str | None = None,
tools: list[dict[str, Any]] | None = None,
available_functions: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Prepare parameters for Anthropic messages API.
@@ -403,6 +404,8 @@ class AnthropicCompletion(BaseLLM):
messages: Formatted messages for Anthropic
system_message: Extracted system message
tools: Tool definitions
available_functions: Available functions for tool calling. When provided
with a single tool, tool_choice is automatically set to force tool use.
Returns:
Parameters dictionary for Anthropic API
@@ -428,7 +431,13 @@ class AnthropicCompletion(BaseLLM):
# Handle tools for Claude 3+
if tools and self.supports_tools:
params["tools"] = self._convert_tools_for_interference(tools)
converted_tools = self._convert_tools_for_interference(tools)
params["tools"] = converted_tools
if available_functions and len(converted_tools) == 1:
tool_name = converted_tools[0].get("name")
if tool_name and tool_name in available_functions:
params["tool_choice"] = {"type": "tool", "name": tool_name}
if self.thinking:
if isinstance(self.thinking, AnthropicThinkingConfig):
@@ -730,15 +739,11 @@ class AnthropicCompletion(BaseLLM):
)
return list(tool_uses)
# Handle tool use conversation flow internally
return self._handle_tool_use_conversation(
response,
tool_uses,
params,
available_functions,
from_task,
from_agent,
result = self._execute_first_tool(
tool_uses, available_functions, from_task, from_agent
)
if result is not None:
return result
content = ""
thinking_blocks: list[ThinkingBlock] = []
@@ -939,14 +944,12 @@ class AnthropicCompletion(BaseLLM):
if not available_functions:
return list(tool_uses)
return self._handle_tool_use_conversation(
final_message,
tool_uses,
params,
available_functions,
from_task,
from_agent,
# Execute first tool and return result directly
result = self._execute_first_tool(
tool_uses, available_functions, from_task, from_agent
)
if result is not None:
return result
full_response = self._apply_stop_words(full_response)
@@ -1005,6 +1008,41 @@ class AnthropicCompletion(BaseLLM):
return tool_results
def _execute_first_tool(
self,
tool_uses: list[ToolUseBlock | BetaToolUseBlock],
available_functions: dict[str, Any],
from_task: Any | None = None,
from_agent: Any | None = None,
) -> Any | None:
"""Execute the first tool from the tool_uses list and return its result.
This is used when available_functions is provided, to directly execute
the tool and return its result (matching OpenAI behavior for use cases
like reasoning_handler).
Args:
tool_uses: List of tool use blocks from Claude's response
available_functions: Available functions for tool calling
from_task: Task that initiated the call
from_agent: Agent that initiated the call
Returns:
The result of the first tool execution, or None if execution failed
"""
tool_use = tool_uses[0]
function_name = tool_use.name
function_args = cast(dict[str, Any], tool_use.input)
return self._handle_tool_execution(
function_name=function_name,
function_args=function_args,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
)
# TODO: we drop this
def _handle_tool_use_conversation(
self,
initial_response: Message | BetaMessage,
@@ -1220,14 +1258,11 @@ class AnthropicCompletion(BaseLLM):
)
return list(tool_uses)
return await self._ahandle_tool_use_conversation(
response,
tool_uses,
params,
available_functions,
from_task,
from_agent,
result = self._execute_first_tool(
tool_uses, available_functions, from_task, from_agent
)
if result is not None:
return result
content = ""
if response.content:
@@ -1408,14 +1443,11 @@ class AnthropicCompletion(BaseLLM):
if not available_functions:
return list(tool_uses)
return await self._ahandle_tool_use_conversation(
final_message,
tool_uses,
params,
available_functions,
from_task,
from_agent,
result = self._execute_first_tool(
tool_uses, available_functions, from_task, from_agent
)
if result is not None:
return result
full_response = self._apply_stop_words(full_response)

View File

@@ -0,0 +1,102 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Calculate 5 +
3 using the simple_calculator tool with operation ''add''."}],"model":"claude-3-5-haiku-20241022","stream":false,"tool_choice":{"type":"tool","name":"simple_calculator"},"tools":[{"name":"simple_calculator","description":"Perform
simple math operations","input_schema":{"type":"object","properties":{"operation":{"type":"string","enum":["add","multiply"],"description":"The
operation to perform"},"a":{"type":"integer","description":"First number"},"b":{"type":"integer","description":"Second
number"}},"required":["operation","a","b"]}}]}'
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '608'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.73.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01Q2F83aAeqqTCxsd8WpZjK7","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01BW4XkHnhRVM5JZsvoaQKw5","name":"simple_calculator","input":{"operation":"add","a":5,"b":3}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":498,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":67,"service_tier":"standard"}}'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Security-Policy:
- CSP-FILTERED
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 23:26:35 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-02-03T23:26:34Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '1228'
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,108 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":"Create a simple
plan to say hello. Use the create_reasoning_plan tool."}],"model":"claude-3-5-haiku-20241022","stream":false,"tool_choice":{"type":"tool","name":"create_reasoning_plan"},"tools":[{"name":"create_reasoning_plan","description":"Create
a structured reasoning plan for completing a task","input_schema":{"type":"object","properties":{"plan":{"type":"string","description":"High-level
plan description"},"steps":{"type":"array","items":{"type":"object"},"description":"List
of steps to execute"},"ready":{"type":"boolean","description":"Whether the plan
is ready to execute"}},"required":["plan","steps","ready"]}}]}'
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '684'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.73.0
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: '{"model":"claude-3-5-haiku-20241022","id":"msg_01HLuGgGRFseMdhTYAhkKtfz","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01GQAUFHffGzMd3ufA6YRMZF","name":"create_reasoning_plan","input":{"plan":"Say
hello in a friendly and straightforward manner","steps":[{"description":"Take
a deep breath","action":"Pause and relax"},{"description":"Smile","action":"Prepare
a warm facial expression"},{"description":"Greet the person","action":"Say
''Hello!''"},{"description":"Wait for response","action":"Listen and be ready
to continue conversation"}],"ready":true}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":513,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":162,"service_tier":"standard"}}'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Security-Policy:
- CSP-FILTERED
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 23:26:38 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-02-03T23:26:35Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '2994'
status:
code: 200
message: OK
version: 1

View File

@@ -45,85 +45,6 @@ def test_anthropic_completion_is_used_when_claude_provider():
def test_anthropic_tool_use_conversation_flow():
"""
Test that the Anthropic completion properly handles tool use conversation flow
"""
from unittest.mock import Mock, patch
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
from anthropic.types.tool_use_block import ToolUseBlock
# Create AnthropicCompletion instance
completion = AnthropicCompletion(model="claude-3-5-sonnet-20241022")
# Mock tool function
def mock_weather_tool(location: str) -> str:
return f"The weather in {location} is sunny and 75°F"
available_functions = {"get_weather": mock_weather_tool}
# Mock the Anthropic client responses
with patch.object(completion.client.messages, 'create') as mock_create:
# Mock initial response with tool use - need to properly mock ToolUseBlock
mock_tool_use = Mock(spec=ToolUseBlock)
mock_tool_use.type = "tool_use"
mock_tool_use.id = "tool_123"
mock_tool_use.name = "get_weather"
mock_tool_use.input = {"location": "San Francisco"}
mock_initial_response = Mock()
mock_initial_response.content = [mock_tool_use]
mock_initial_response.usage = Mock()
mock_initial_response.usage.input_tokens = 100
mock_initial_response.usage.output_tokens = 50
# Mock final response after tool result - properly mock text content
mock_text_block = Mock()
mock_text_block.type = "text"
# Set the text attribute as a string, not another Mock
mock_text_block.configure_mock(text="Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature.")
mock_final_response = Mock()
mock_final_response.content = [mock_text_block]
mock_final_response.usage = Mock()
mock_final_response.usage.input_tokens = 150
mock_final_response.usage.output_tokens = 75
# Configure mock to return different responses on successive calls
mock_create.side_effect = [mock_initial_response, mock_final_response]
# Test the call
messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}]
result = completion.call(
messages=messages,
available_functions=available_functions
)
# Verify the result contains the final response
assert "beautiful day in San Francisco" in result
assert "sunny skies" in result
assert "75°F" in result
# Verify that two API calls were made (initial + follow-up)
assert mock_create.call_count == 2
# Verify the second call includes tool results
second_call_args = mock_create.call_args_list[1][1] # kwargs of second call
messages_in_second_call = second_call_args["messages"]
# Should have original user message + assistant tool use + user tool result
assert len(messages_in_second_call) == 3
assert messages_in_second_call[0]["role"] == "user"
assert messages_in_second_call[1]["role"] == "assistant"
assert messages_in_second_call[2]["role"] == "user"
# Verify tool result format
tool_result = messages_in_second_call[2]["content"][0]
assert tool_result["type"] == "tool_result"
assert tool_result["tool_use_id"] == "tool_123"
assert "sunny and 75°F" in tool_result["content"]
def test_anthropic_completion_module_is_imported():
"""
Test that the completion module is properly imported when using Anthropic provider
@@ -874,6 +795,125 @@ def test_anthropic_function_calling():
# =============================================================================
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
def test_anthropic_tool_execution_with_available_functions():
"""
Test that Anthropic provider correctly executes tools when available_functions is provided.
This specifically tests the fix for double llm_call_completed emission - when
available_functions is provided, _handle_tool_execution is called which already
emits llm_call_completed, so the caller should not emit it again.
The test verifies:
1. The tool is called with correct arguments
2. The tool result is returned directly (not wrapped in conversation)
3. The result is valid JSON matching the tool output format
"""
import json
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
# Simple tool that returns a formatted string
def create_reasoning_plan(plan: str, steps: list, ready: bool) -> str:
"""Create a reasoning plan with steps."""
return json.dumps({"plan": plan, "steps": steps, "ready": ready})
tools = [
{
"name": "create_reasoning_plan",
"description": "Create a structured reasoning plan for completing a task",
"input_schema": {
"type": "object",
"properties": {
"plan": {
"type": "string",
"description": "High-level plan description"
},
"steps": {
"type": "array",
"items": {"type": "object"},
"description": "List of steps to execute"
},
"ready": {
"type": "boolean",
"description": "Whether the plan is ready to execute"
}
},
"required": ["plan", "steps", "ready"]
}
}
]
result = llm.call(
messages=[{"role": "user", "content": "Create a simple plan to say hello. Use the create_reasoning_plan tool."}],
tools=tools,
available_functions={"create_reasoning_plan": create_reasoning_plan}
)
# Verify result is valid JSON from the tool
assert result is not None
assert isinstance(result, str)
# Parse the result to verify it's valid JSON
parsed_result = json.loads(result)
assert "plan" in parsed_result
assert "steps" in parsed_result
assert "ready" in parsed_result
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
def test_anthropic_tool_execution_returns_tool_result_directly():
"""
Test that when available_functions is provided, the tool result is returned directly
without additional LLM conversation (matching OpenAI behavior for reasoning_handler).
"""
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
call_count = 0
def simple_calculator(operation: str, a: int, b: int) -> str:
"""Perform a simple calculation."""
nonlocal call_count
call_count += 1
if operation == "add":
return str(a + b)
elif operation == "multiply":
return str(a * b)
return "Unknown operation"
tools = [
{
"name": "simple_calculator",
"description": "Perform simple math operations",
"input_schema": {
"type": "object",
"properties": {
"operation": {
"type": "string",
"enum": ["add", "multiply"],
"description": "The operation to perform"
},
"a": {"type": "integer", "description": "First number"},
"b": {"type": "integer", "description": "Second number"}
},
"required": ["operation", "a", "b"]
}
}
]
result = llm.call(
messages=[{"role": "user", "content": "Calculate 5 + 3 using the simple_calculator tool with operation 'add'."}],
tools=tools,
available_functions={"simple_calculator": simple_calculator}
)
# Tool should have been called exactly once
assert call_count == 1, f"Expected tool to be called once, got {call_count}"
# Result should be the direct tool output
assert result == "8", f"Expected '8' but got '{result}'"
@pytest.mark.vcr()
def test_anthropic_agent_kickoff_structured_output_without_tools():
"""