mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-01 07:13:00 +00:00
1466 lines
50 KiB
Python
1466 lines
50 KiB
Python
import os
|
|
import sys
|
|
import types
|
|
from unittest.mock import patch, MagicMock
|
|
import pytest
|
|
|
|
from crewai.llm import LLM
|
|
from crewai.crew import Crew
|
|
from crewai.agent import Agent
|
|
from crewai.task import Task
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_anthropic_api_key():
|
|
"""Automatically mock ANTHROPIC_API_KEY for all tests in this module if not already set."""
|
|
if "ANTHROPIC_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
|
|
def test_anthropic_completion_is_used_when_anthropic_provider():
|
|
"""
|
|
Test that AnthropicCompletion from completion.py is used when LLM uses provider 'anthropic'
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
assert llm.__class__.__name__ == "AnthropicCompletion"
|
|
assert llm.provider == "anthropic"
|
|
assert llm.model == "claude-3-5-sonnet-20241022"
|
|
|
|
|
|
def test_anthropic_completion_is_used_when_claude_provider():
|
|
"""
|
|
Test that AnthropicCompletion is used when provider is 'claude'
|
|
"""
|
|
llm = LLM(model="claude/claude-3-5-sonnet-20241022")
|
|
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
assert llm.provider == "anthropic"
|
|
assert llm.model == "claude-3-5-sonnet-20241022"
|
|
|
|
|
|
|
|
|
|
def test_anthropic_completion_module_is_imported():
|
|
"""
|
|
Test that the completion module is properly imported when using Anthropic provider
|
|
"""
|
|
module_name = "crewai.llms.providers.anthropic.completion"
|
|
|
|
# Remove module from cache if it exists
|
|
if module_name in sys.modules:
|
|
del sys.modules[module_name]
|
|
|
|
# Create LLM instance - this should trigger the import
|
|
LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Verify the module was imported
|
|
assert module_name in sys.modules
|
|
completion_mod = sys.modules[module_name]
|
|
assert isinstance(completion_mod, types.ModuleType)
|
|
|
|
# Verify the class exists in the module
|
|
assert hasattr(completion_mod, 'AnthropicCompletion')
|
|
|
|
|
|
def test_native_anthropic_raises_error_when_initialization_fails():
|
|
"""
|
|
Test that LLM raises ImportError when native Anthropic completion fails to initialize.
|
|
This ensures we don't silently fall back when there's a configuration issue.
|
|
"""
|
|
# Mock the _get_native_provider to return a failing class
|
|
with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider:
|
|
|
|
class FailingCompletion:
|
|
def __init__(self, *args, **kwargs):
|
|
raise Exception("Native Anthropic SDK failed")
|
|
|
|
mock_get_provider.return_value = FailingCompletion
|
|
|
|
# This should raise ImportError, not fall back to LiteLLM
|
|
with pytest.raises(ImportError) as excinfo:
|
|
LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
assert "Error importing native provider" in str(excinfo.value)
|
|
assert "Native Anthropic SDK failed" in str(excinfo.value)
|
|
|
|
|
|
def test_anthropic_completion_initialization_parameters():
|
|
"""
|
|
Test that AnthropicCompletion is initialized with correct parameters
|
|
"""
|
|
llm = LLM(
|
|
model="anthropic/claude-3-5-sonnet-20241022",
|
|
temperature=0.7,
|
|
max_tokens=2000,
|
|
top_p=0.9,
|
|
api_key="test-key"
|
|
)
|
|
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
assert llm.model == "claude-3-5-sonnet-20241022"
|
|
assert llm.temperature == 0.7
|
|
assert llm.max_tokens == 2000
|
|
assert llm.top_p == 0.9
|
|
|
|
|
|
def test_anthropic_specific_parameters():
|
|
"""
|
|
Test Anthropic-specific parameters like stop_sequences and streaming
|
|
"""
|
|
llm = LLM(
|
|
model="anthropic/claude-3-5-sonnet-20241022",
|
|
stop_sequences=["Human:", "Assistant:"],
|
|
stream=True,
|
|
max_retries=5,
|
|
timeout=60
|
|
)
|
|
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
assert llm.stop_sequences == ["Human:", "Assistant:"]
|
|
assert llm.stream == True
|
|
assert llm._client.max_retries == 5
|
|
assert llm._client.timeout == 60
|
|
|
|
|
|
def test_anthropic_completion_call():
|
|
"""
|
|
Test that AnthropicCompletion call method works
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock the call method on the instance
|
|
with patch.object(llm, 'call', return_value="Hello! I'm Claude, ready to help.") as mock_call:
|
|
result = llm.call("Hello, how are you?")
|
|
|
|
assert result == "Hello! I'm Claude, ready to help."
|
|
mock_call.assert_called_once_with("Hello, how are you?")
|
|
|
|
|
|
def test_anthropic_completion_called_during_crew_execution():
|
|
"""
|
|
Test that AnthropicCompletion.call is actually invoked when running a crew
|
|
"""
|
|
# Create the LLM instance first
|
|
anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock the call method on the specific instance
|
|
with patch.object(anthropic_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call:
|
|
|
|
# Create agent with explicit LLM configuration
|
|
agent = Agent(
|
|
role="Research Assistant",
|
|
goal="Find population info",
|
|
backstory="You research populations.",
|
|
llm=anthropic_llm,
|
|
)
|
|
|
|
task = Task(
|
|
description="Find Tokyo population",
|
|
expected_output="Population number",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
# Verify mock was called
|
|
assert mock_call.called
|
|
assert "14 million" in str(result)
|
|
|
|
|
|
def test_anthropic_completion_call_arguments():
|
|
"""
|
|
Test that AnthropicCompletion.call is invoked with correct arguments
|
|
"""
|
|
# Create LLM instance first
|
|
anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock the instance method
|
|
with patch.object(anthropic_llm, 'call') as mock_call:
|
|
mock_call.return_value = "Task completed successfully."
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="Complete a simple task",
|
|
backstory="You are a test agent.",
|
|
llm=anthropic_llm # Use same instance
|
|
)
|
|
|
|
task = Task(
|
|
description="Say hello world",
|
|
expected_output="Hello world",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
crew.kickoff()
|
|
|
|
# Verify call was made
|
|
assert mock_call.called
|
|
|
|
# Check the arguments passed to the call method
|
|
call_args = mock_call.call_args
|
|
assert call_args is not None
|
|
|
|
# The first argument should be the messages
|
|
messages = call_args[0][0] # First positional argument
|
|
assert isinstance(messages, (str, list))
|
|
|
|
# Verify that the task description appears in the messages
|
|
if isinstance(messages, str):
|
|
assert "hello world" in messages.lower()
|
|
elif isinstance(messages, list):
|
|
message_content = str(messages).lower()
|
|
assert "hello world" in message_content
|
|
|
|
|
|
def test_multiple_anthropic_calls_in_crew():
|
|
"""
|
|
Test that AnthropicCompletion.call is invoked multiple times for multiple tasks
|
|
"""
|
|
# Create LLM instance first
|
|
anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock the instance method
|
|
with patch.object(anthropic_llm, 'call') as mock_call:
|
|
mock_call.return_value = "Task completed."
|
|
|
|
agent = Agent(
|
|
role="Multi-task Agent",
|
|
goal="Complete multiple tasks",
|
|
backstory="You can handle multiple tasks.",
|
|
llm=anthropic_llm # Use same instance
|
|
)
|
|
|
|
task1 = Task(
|
|
description="First task",
|
|
expected_output="First result",
|
|
agent=agent,
|
|
)
|
|
|
|
task2 = Task(
|
|
description="Second task",
|
|
expected_output="Second result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(
|
|
agents=[agent],
|
|
tasks=[task1, task2]
|
|
)
|
|
crew.kickoff()
|
|
|
|
# Verify multiple calls were made
|
|
assert mock_call.call_count >= 2 # At least one call per task
|
|
|
|
# Verify each call had proper arguments
|
|
for call in mock_call.call_args_list:
|
|
assert len(call[0]) > 0 # Has positional arguments
|
|
messages = call[0][0]
|
|
assert messages is not None
|
|
|
|
|
|
def test_anthropic_completion_with_tools():
|
|
"""
|
|
Test that AnthropicCompletion.call is invoked with tools when agent has tools
|
|
"""
|
|
from crewai.tools import tool
|
|
|
|
@tool
|
|
def sample_tool(query: str) -> str:
|
|
"""A sample tool for testing"""
|
|
return f"Tool result for: {query}"
|
|
|
|
# Create LLM instance first
|
|
anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock the instance method
|
|
with patch.object(anthropic_llm, 'call') as mock_call:
|
|
mock_call.return_value = "Task completed with tools."
|
|
|
|
agent = Agent(
|
|
role="Tool User",
|
|
goal="Use tools to complete tasks",
|
|
backstory="You can use tools.",
|
|
llm=anthropic_llm, # Use same instance
|
|
tools=[sample_tool]
|
|
)
|
|
|
|
task = Task(
|
|
description="Use the sample tool",
|
|
expected_output="Tool usage result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
crew.kickoff()
|
|
|
|
assert mock_call.called
|
|
|
|
call_args = mock_call.call_args
|
|
call_kwargs = call_args[1] if len(call_args) > 1 else {}
|
|
|
|
if 'tools' in call_kwargs:
|
|
assert call_kwargs['tools'] is not None
|
|
assert len(call_kwargs['tools']) > 0
|
|
|
|
|
|
def test_anthropic_raises_error_when_model_not_supported():
|
|
"""Test that AnthropicCompletion raises ValueError when model not supported"""
|
|
|
|
# Mock the Anthropic client to raise an error
|
|
with patch('crewai.llms.providers.anthropic.completion.Anthropic') as mock_anthropic_class:
|
|
mock_client = MagicMock()
|
|
mock_anthropic_class.return_value = mock_client
|
|
|
|
# Mock the error that Anthropic would raise for unsupported models
|
|
from anthropic import NotFoundError
|
|
mock_client.messages.create.side_effect = NotFoundError(
|
|
message="The model `model-doesnt-exist` does not exist",
|
|
response=MagicMock(),
|
|
body={}
|
|
)
|
|
|
|
llm = LLM(model="anthropic/model-doesnt-exist")
|
|
|
|
with pytest.raises(Exception): # Should raise some error for unsupported model
|
|
llm.call("Hello")
|
|
|
|
|
|
def test_anthropic_client_params_setup():
|
|
"""
|
|
Test that client_params are properly merged with default client parameters
|
|
"""
|
|
# Use only valid Anthropic client parameters
|
|
custom_client_params = {
|
|
"default_headers": {"X-Custom-Header": "test-value"},
|
|
}
|
|
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
llm = LLM(
|
|
model="anthropic/claude-3-5-sonnet-20241022",
|
|
api_key="test-key",
|
|
base_url="https://custom-api.com",
|
|
timeout=45,
|
|
max_retries=5,
|
|
client_params=custom_client_params
|
|
)
|
|
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
|
|
assert llm.client_params == custom_client_params
|
|
|
|
merged_params = llm._get_client_params()
|
|
|
|
assert merged_params["api_key"] == "test-key"
|
|
assert merged_params["base_url"] == "https://custom-api.com"
|
|
assert merged_params["timeout"] == 45
|
|
assert merged_params["max_retries"] == 5
|
|
|
|
assert merged_params["default_headers"] == {"X-Custom-Header": "test-value"}
|
|
|
|
|
|
def test_anthropic_client_params_override_defaults():
|
|
"""
|
|
Test that client_params can override default client parameters
|
|
"""
|
|
override_client_params = {
|
|
"timeout": 120, # Override the timeout parameter
|
|
"max_retries": 10, # Override the max_retries parameter
|
|
"default_headers": {"X-Override": "true"} # Valid custom parameter
|
|
}
|
|
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
llm = LLM(
|
|
model="anthropic/claude-3-5-sonnet-20241022",
|
|
api_key="test-key",
|
|
timeout=30,
|
|
max_retries=3,
|
|
client_params=override_client_params
|
|
)
|
|
|
|
# Verify this is actually AnthropicCompletion, not LiteLLM fallback
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
|
|
merged_params = llm._get_client_params()
|
|
|
|
# client_params should override the individual parameters
|
|
assert merged_params["timeout"] == 120
|
|
assert merged_params["max_retries"] == 10
|
|
assert merged_params["default_headers"] == {"X-Override": "true"}
|
|
|
|
|
|
def test_anthropic_client_params_none():
|
|
"""
|
|
Test that client_params=None works correctly (no additional parameters)
|
|
"""
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
llm = LLM(
|
|
model="anthropic/claude-3-5-sonnet-20241022",
|
|
api_key="test-key",
|
|
base_url="https://api.anthropic.com",
|
|
timeout=60,
|
|
max_retries=2,
|
|
client_params=None
|
|
)
|
|
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
|
|
assert llm.client_params is None
|
|
|
|
merged_params = llm._get_client_params()
|
|
|
|
expected_keys = {"api_key", "base_url", "timeout", "max_retries"}
|
|
assert set(merged_params.keys()) == expected_keys
|
|
|
|
# Fixed assertions - all should be inside the with block and use correct values
|
|
assert merged_params["api_key"] == "test-key" # Not "test-anthropic-key"
|
|
assert merged_params["base_url"] == "https://api.anthropic.com"
|
|
assert merged_params["timeout"] == 60
|
|
assert merged_params["max_retries"] == 2
|
|
|
|
|
|
def test_anthropic_client_params_empty_dict():
|
|
"""
|
|
Test that client_params={} works correctly (empty additional parameters)
|
|
"""
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
llm = LLM(
|
|
model="anthropic/claude-3-5-sonnet-20241022",
|
|
api_key="test-key",
|
|
client_params={}
|
|
)
|
|
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
|
|
assert llm.client_params == {}
|
|
|
|
merged_params = llm._get_client_params()
|
|
|
|
assert "api_key" in merged_params
|
|
assert merged_params["api_key"] == "test-key"
|
|
|
|
|
|
def test_anthropic_model_detection():
|
|
"""
|
|
Test that various Anthropic model formats are properly detected
|
|
"""
|
|
# Test Anthropic model naming patterns that actually work with provider detection
|
|
anthropic_test_cases = [
|
|
"anthropic/claude-3-5-sonnet-20241022",
|
|
"claude/claude-3-5-sonnet-20241022"
|
|
]
|
|
|
|
for model_name in anthropic_test_cases:
|
|
llm = LLM(model=model_name)
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
assert isinstance(llm, AnthropicCompletion), f"Failed for model: {model_name}"
|
|
|
|
|
|
def test_anthropic_supports_stop_words():
|
|
"""
|
|
Test that Anthropic models support stop sequences
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
assert llm.supports_stop_words() == True
|
|
|
|
|
|
def test_anthropic_context_window_size():
|
|
"""
|
|
Test that Anthropic models return correct context window sizes
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
context_size = llm.get_context_window_size()
|
|
|
|
# Should return a reasonable context window size (Claude 3.5 has 200k tokens)
|
|
assert context_size > 100000 # Should be substantial
|
|
assert context_size <= 200000 # But not exceed the actual limit
|
|
|
|
|
|
def test_anthropic_message_formatting():
|
|
"""
|
|
Test that messages are properly formatted for Anthropic API
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Test message formatting
|
|
test_messages = [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi there!"},
|
|
{"role": "user", "content": "How are you?"}
|
|
]
|
|
|
|
formatted_messages, system_message = llm._format_messages_for_anthropic(test_messages)
|
|
|
|
# System message should be extracted
|
|
assert system_message == "You are a helpful assistant."
|
|
|
|
# Remaining messages should start with user
|
|
assert formatted_messages[0]["role"] == "user"
|
|
assert len(formatted_messages) >= 3 # Should have user, assistant, user messages
|
|
|
|
|
|
def test_anthropic_streaming_parameter():
|
|
"""
|
|
Test that streaming parameter is properly handled
|
|
"""
|
|
# Test non-streaming
|
|
llm_no_stream = LLM(model="anthropic/claude-3-5-sonnet-20241022", stream=False)
|
|
assert llm_no_stream.stream == False
|
|
|
|
# Test streaming
|
|
llm_stream = LLM(model="anthropic/claude-3-5-sonnet-20241022", stream=True)
|
|
assert llm_stream.stream == True
|
|
|
|
|
|
def test_anthropic_tool_conversion():
|
|
"""
|
|
Test that tools are properly converted to Anthropic format
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock tool in CrewAI format
|
|
crewai_tools = [{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "test_tool",
|
|
"description": "A test tool",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string", "description": "Search query"}
|
|
},
|
|
"required": ["query"]
|
|
}
|
|
}
|
|
}]
|
|
|
|
# Test tool conversion
|
|
anthropic_tools = llm._convert_tools_for_interference(crewai_tools)
|
|
|
|
assert len(anthropic_tools) == 1
|
|
assert anthropic_tools[0]["name"] == "test_tool"
|
|
assert anthropic_tools[0]["description"] == "A test tool"
|
|
assert "input_schema" in anthropic_tools[0]
|
|
|
|
|
|
def test_anthropic_environment_variable_api_key():
|
|
"""
|
|
Test that Anthropic API key is properly loaded from environment
|
|
"""
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-anthropic-key"}):
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
assert llm._client is not None
|
|
assert hasattr(llm._client, 'messages')
|
|
|
|
|
|
def test_anthropic_token_usage_tracking():
|
|
"""
|
|
Test that token usage is properly tracked for Anthropic responses
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Mock the Anthropic response with usage information
|
|
with patch.object(llm._client.messages, 'create') as mock_create:
|
|
mock_response = MagicMock()
|
|
mock_response.content = [MagicMock(text="test response")]
|
|
mock_response.usage = MagicMock(input_tokens=50, output_tokens=25)
|
|
mock_create.return_value = mock_response
|
|
|
|
result = llm.call("Hello")
|
|
|
|
# Verify the response
|
|
assert result == "test response"
|
|
|
|
# Verify token usage was extracted
|
|
usage = llm._extract_anthropic_token_usage(mock_response)
|
|
assert usage["input_tokens"] == 50
|
|
assert usage["output_tokens"] == 25
|
|
assert usage["total_tokens"] == 75
|
|
|
|
|
|
def test_anthropic_stop_sequences_sync():
|
|
"""Test that stop and stop_sequences attributes stay synchronized."""
|
|
llm = LLM(model="anthropic/claude-3-5-sonnet-20241022")
|
|
|
|
# Test setting stop as a list
|
|
llm.stop = ["\nObservation:", "\nThought:"]
|
|
assert llm.stop_sequences == ["\nObservation:", "\nThought:"]
|
|
assert llm.stop == ["\nObservation:", "\nThought:"]
|
|
|
|
# Test setting stop as a string
|
|
llm.stop = "\nFinal Answer:"
|
|
assert llm.stop_sequences == ["\nFinal Answer:"]
|
|
assert llm.stop == ["\nFinal Answer:"]
|
|
|
|
# Test setting stop as None
|
|
llm.stop = None
|
|
assert llm.stop_sequences == []
|
|
assert llm.stop == []
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_stop_sequences_sent_to_api():
|
|
"""Test that stop_sequences are properly sent to the Anthropic API."""
|
|
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
|
|
|
|
llm.stop = ["\nObservation:", "\nThought:"]
|
|
|
|
result = llm.call("Say hello in one word")
|
|
|
|
assert result is not None
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
|
|
def test_anthropic_thinking():
|
|
"""Test that thinking is properly handled and thinking params are passed to messages.create"""
|
|
from unittest.mock import patch
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
|
|
llm = LLM(
|
|
model="anthropic/claude-sonnet-4-5",
|
|
thinking={"type": "enabled", "budget_tokens": 5000},
|
|
max_tokens=10000
|
|
)
|
|
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
|
|
original_create = llm._client.messages.create
|
|
captured_params = {}
|
|
|
|
def capture_and_call(**kwargs):
|
|
captured_params.update(kwargs)
|
|
return original_create(**kwargs)
|
|
|
|
with patch.object(llm._client.messages, 'create', side_effect=capture_and_call):
|
|
result = llm.call("What is the weather in Tokyo?")
|
|
|
|
assert result is not None
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
|
|
assert "thinking" in captured_params
|
|
assert captured_params["thinking"] == {"type": "enabled", "budget_tokens": 5000}
|
|
|
|
assert captured_params["model"] == "claude-sonnet-4-5"
|
|
assert captured_params["max_tokens"] == 10000
|
|
assert "messages" in captured_params
|
|
assert len(captured_params["messages"]) > 0
|
|
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
|
|
def test_anthropic_thinking_blocks_preserved_across_turns():
|
|
"""Test that thinking blocks are stored and included in subsequent API calls across turns"""
|
|
from unittest.mock import patch
|
|
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
|
|
|
llm = LLM(
|
|
model="anthropic/claude-sonnet-4-5",
|
|
thinking={"type": "enabled", "budget_tokens": 5000},
|
|
max_tokens=10000
|
|
)
|
|
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
|
|
# Capture all messages.create calls to verify thinking blocks are included
|
|
original_create = llm._client.messages.create
|
|
captured_calls = []
|
|
|
|
def capture_and_call(**kwargs):
|
|
captured_calls.append(kwargs)
|
|
return original_create(**kwargs)
|
|
|
|
with patch.object(llm._client.messages, 'create', side_effect=capture_and_call):
|
|
# First call - establishes context and generates thinking blocks
|
|
messages = [{"role": "user", "content": "What is 2+2?"}]
|
|
first_result = llm.call(messages)
|
|
|
|
# Verify first call completed
|
|
assert first_result is not None
|
|
assert isinstance(first_result, str)
|
|
assert len(first_result) > 0
|
|
|
|
# Verify thinking blocks were stored after first response
|
|
assert len(llm._previous_thinking_blocks) > 0, "No thinking blocks stored after first call"
|
|
first_thinking = llm._previous_thinking_blocks[0]
|
|
assert first_thinking["type"] == "thinking"
|
|
assert "thinking" in first_thinking
|
|
assert "signature" in first_thinking
|
|
|
|
# Store the thinking block content for comparison
|
|
stored_thinking_content = first_thinking["thinking"]
|
|
stored_signature = first_thinking["signature"]
|
|
|
|
# Second call - should include thinking blocks from first call
|
|
messages.append({"role": "assistant", "content": first_result})
|
|
messages.append({"role": "user", "content": "Now what is 3+3?"})
|
|
second_result = llm.call(messages)
|
|
|
|
# Verify second call completed
|
|
assert second_result is not None
|
|
assert isinstance(second_result, str)
|
|
|
|
# Verify at least 2 API calls were made
|
|
assert len(captured_calls) >= 2, f"Expected at least 2 API calls, got {len(captured_calls)}"
|
|
|
|
# Verify second call includes thinking blocks in assistant message
|
|
second_call_messages = captured_calls[1]["messages"]
|
|
|
|
# Should have: user message + assistant message (with thinking blocks) + follow-up user message
|
|
assert len(second_call_messages) >= 2
|
|
|
|
# Find the assistant message in the second call
|
|
assistant_message = None
|
|
for msg in second_call_messages:
|
|
if msg["role"] == "assistant" and isinstance(msg.get("content"), list):
|
|
assistant_message = msg
|
|
break
|
|
|
|
assert assistant_message is not None, "Assistant message with list content not found in second call"
|
|
assert isinstance(assistant_message["content"], list)
|
|
|
|
# Verify thinking block is included in assistant message content
|
|
thinking_found = False
|
|
for block in assistant_message["content"]:
|
|
if isinstance(block, dict) and block.get("type") == "thinking":
|
|
thinking_found = True
|
|
assert "thinking" in block
|
|
assert "signature" in block
|
|
# Verify it matches what was stored from the first call
|
|
assert block["thinking"] == stored_thinking_content
|
|
assert block["signature"] == stored_signature
|
|
break
|
|
|
|
assert thinking_found, "Thinking block not found in assistant message content in second call"
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
|
|
def test_anthropic_function_calling():
|
|
"""Test that function calling is properly handled"""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5")
|
|
|
|
def get_weather(location: str) -> str:
|
|
return f"The weather in {location} is sunny and 72°F"
|
|
|
|
tools = [
|
|
{
|
|
"name": "get_weather",
|
|
"description": "Get the current weather in a given location",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city and state, e.g. San Francisco, CA"
|
|
},
|
|
"unit": {
|
|
"type": "string",
|
|
"enum": ["celsius", "fahrenheit"],
|
|
"description": "The unit of temperature"
|
|
}
|
|
},
|
|
"required": ["location"]
|
|
}
|
|
}
|
|
]
|
|
|
|
result = llm.call(
|
|
"What is the weather in Tokyo? Use the get_weather tool.",
|
|
tools=tools,
|
|
available_functions={"get_weather": get_weather}
|
|
)
|
|
|
|
assert result is not None
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
# Verify the response includes information about Tokyo's weather
|
|
assert "tokyo" in result.lower() or "72" in result
|
|
|
|
|
|
# =============================================================================
|
|
# Agent Kickoff Structured Output Tests
|
|
# =============================================================================
|
|
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
|
|
def test_anthropic_tool_execution_with_available_functions():
|
|
"""
|
|
Test that Anthropic provider correctly executes tools when available_functions is provided.
|
|
|
|
This specifically tests the fix for double llm_call_completed emission - when
|
|
available_functions is provided, _handle_tool_execution is called which already
|
|
emits llm_call_completed, so the caller should not emit it again.
|
|
|
|
The test verifies:
|
|
1. The tool is called with correct arguments
|
|
2. The tool result is returned directly (not wrapped in conversation)
|
|
3. The result is valid JSON matching the tool output format
|
|
"""
|
|
import json
|
|
|
|
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
|
|
|
|
# Simple tool that returns a formatted string
|
|
def create_reasoning_plan(plan: str, steps: list, ready: bool) -> str:
|
|
"""Create a reasoning plan with steps."""
|
|
return json.dumps({"plan": plan, "steps": steps, "ready": ready})
|
|
|
|
tools = [
|
|
{
|
|
"name": "create_reasoning_plan",
|
|
"description": "Create a structured reasoning plan for completing a task",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"plan": {
|
|
"type": "string",
|
|
"description": "High-level plan description"
|
|
},
|
|
"steps": {
|
|
"type": "array",
|
|
"items": {"type": "object"},
|
|
"description": "List of steps to execute"
|
|
},
|
|
"ready": {
|
|
"type": "boolean",
|
|
"description": "Whether the plan is ready to execute"
|
|
}
|
|
},
|
|
"required": ["plan", "steps", "ready"]
|
|
}
|
|
}
|
|
]
|
|
|
|
result = llm.call(
|
|
messages=[{"role": "user", "content": "Create a simple plan to say hello. Use the create_reasoning_plan tool."}],
|
|
tools=tools,
|
|
available_functions={"create_reasoning_plan": create_reasoning_plan}
|
|
)
|
|
|
|
# Verify result is valid JSON from the tool
|
|
assert result is not None
|
|
assert isinstance(result, str)
|
|
|
|
# Parse the result to verify it's valid JSON
|
|
parsed_result = json.loads(result)
|
|
assert "plan" in parsed_result
|
|
assert "steps" in parsed_result
|
|
assert "ready" in parsed_result
|
|
|
|
|
|
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
|
|
def test_anthropic_tool_execution_returns_tool_result_directly():
|
|
"""
|
|
Test that when available_functions is provided, the tool result is returned directly
|
|
without additional LLM conversation (matching OpenAI behavior for reasoning_handler).
|
|
"""
|
|
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
|
|
|
|
call_count = 0
|
|
|
|
def simple_calculator(operation: str, a: int, b: int) -> str:
|
|
"""Perform a simple calculation."""
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if operation == "add":
|
|
return str(a + b)
|
|
elif operation == "multiply":
|
|
return str(a * b)
|
|
return "Unknown operation"
|
|
|
|
tools = [
|
|
{
|
|
"name": "simple_calculator",
|
|
"description": "Perform simple math operations",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"operation": {
|
|
"type": "string",
|
|
"enum": ["add", "multiply"],
|
|
"description": "The operation to perform"
|
|
},
|
|
"a": {"type": "integer", "description": "First number"},
|
|
"b": {"type": "integer", "description": "Second number"}
|
|
},
|
|
"required": ["operation", "a", "b"]
|
|
}
|
|
}
|
|
]
|
|
|
|
result = llm.call(
|
|
messages=[{"role": "user", "content": "Calculate 5 + 3 using the simple_calculator tool with operation 'add'."}],
|
|
tools=tools,
|
|
available_functions={"simple_calculator": simple_calculator}
|
|
)
|
|
|
|
# Tool should have been called exactly once
|
|
assert call_count == 1, f"Expected tool to be called once, got {call_count}"
|
|
|
|
# Result should be the direct tool output
|
|
assert result == "8", f"Expected '8' but got '{result}'"
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_agent_kickoff_structured_output_without_tools():
|
|
"""
|
|
Test that agent kickoff returns structured output without tools.
|
|
This tests native structured output handling for Anthropic models.
|
|
"""
|
|
from pydantic import BaseModel, Field
|
|
|
|
class AnalysisResult(BaseModel):
|
|
"""Structured output for analysis results."""
|
|
|
|
topic: str = Field(description="The topic analyzed")
|
|
key_points: list[str] = Field(description="Key insights from the analysis")
|
|
summary: str = Field(description="Brief summary of findings")
|
|
|
|
agent = Agent(
|
|
role="Analyst",
|
|
goal="Provide structured analysis on topics",
|
|
backstory="You are an expert analyst who provides clear, structured insights.",
|
|
llm=LLM(model="anthropic/claude-3-5-haiku-20241022"),
|
|
tools=[],
|
|
verbose=True,
|
|
)
|
|
|
|
result = agent.kickoff(
|
|
messages="Analyze the benefits of remote work briefly. Keep it concise.",
|
|
response_format=AnalysisResult,
|
|
)
|
|
|
|
assert result.pydantic is not None, "Expected pydantic output but got None"
|
|
assert isinstance(result.pydantic, AnalysisResult), f"Expected AnalysisResult but got {type(result.pydantic)}"
|
|
assert result.pydantic.topic, "Topic should not be empty"
|
|
assert len(result.pydantic.key_points) > 0, "Should have at least one key point"
|
|
assert result.pydantic.summary, "Summary should not be empty"
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_agent_kickoff_structured_output_with_tools():
|
|
"""
|
|
Test that agent kickoff returns structured output after using tools.
|
|
This tests post-tool-call structured output handling for Anthropic models.
|
|
"""
|
|
from pydantic import BaseModel, Field
|
|
from crewai.tools import tool
|
|
|
|
class CalculationResult(BaseModel):
|
|
"""Structured output for calculation results."""
|
|
|
|
operation: str = Field(description="The mathematical operation performed")
|
|
result: int = Field(description="The result of the calculation")
|
|
explanation: str = Field(description="Brief explanation of the calculation")
|
|
|
|
@tool
|
|
def add_numbers(a: int, b: int) -> int:
|
|
"""Add two numbers together and return the sum."""
|
|
return a + b
|
|
|
|
agent = Agent(
|
|
role="Calculator",
|
|
goal="Perform calculations using available tools",
|
|
backstory="You are a calculator assistant that uses tools to compute results.",
|
|
llm=LLM(model="anthropic/claude-3-5-haiku-20241022"),
|
|
tools=[add_numbers],
|
|
verbose=True,
|
|
)
|
|
|
|
result = agent.kickoff(
|
|
messages="Calculate 15 + 27 using your add_numbers tool. Report the result.",
|
|
response_format=CalculationResult,
|
|
)
|
|
|
|
assert result.pydantic is not None, "Expected pydantic output but got None"
|
|
assert isinstance(result.pydantic, CalculationResult), f"Expected CalculationResult but got {type(result.pydantic)}"
|
|
assert result.pydantic.result == 42, f"Expected result 42 but got {result.pydantic.result}"
|
|
assert result.pydantic.operation, "Operation should not be empty"
|
|
assert result.pydantic.explanation, "Explanation should not be empty"
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_cached_prompt_tokens():
|
|
"""
|
|
Test that Anthropic correctly extracts and tracks cached_prompt_tokens
|
|
from cache_read_input_tokens. Uses cache_control to enable prompt caching
|
|
and sends the same large prompt twice so the second call hits the cache.
|
|
"""
|
|
# Anthropic requires cache_control blocks and >=1024 tokens for caching
|
|
padding = "This is padding text to ensure the prompt is large enough for caching. " * 80
|
|
system_msg = f"You are a helpful assistant. {padding}"
|
|
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5-20250929")
|
|
|
|
def _ephemeral_user(text: str):
|
|
return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}]
|
|
|
|
# First call: creates the cache
|
|
llm.call([
|
|
{"role": "system", "content": system_msg},
|
|
{"role": "user", "content": _ephemeral_user("Say hello in one word.")},
|
|
])
|
|
|
|
# Second call: same system prompt should hit the cache
|
|
llm.call([
|
|
{"role": "system", "content": system_msg},
|
|
{"role": "user", "content": _ephemeral_user("Say goodbye in one word.")},
|
|
])
|
|
|
|
usage = llm.get_token_usage_summary()
|
|
assert usage.total_tokens > 0
|
|
assert usage.prompt_tokens > 0
|
|
assert usage.completion_tokens > 0
|
|
assert usage.successful_requests == 2
|
|
# The second call should have cached prompt tokens
|
|
assert usage.cached_prompt_tokens > 0
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_streaming_cached_prompt_tokens():
|
|
"""
|
|
Test that Anthropic streaming correctly extracts and tracks cached_prompt_tokens.
|
|
"""
|
|
padding = "This is padding text to ensure the prompt is large enough for caching. " * 80
|
|
system_msg = f"You are a helpful assistant. {padding}"
|
|
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5-20250929", stream=True)
|
|
|
|
def _ephemeral_user(text: str):
|
|
return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}]
|
|
|
|
# First call: creates the cache
|
|
llm.call([
|
|
{"role": "system", "content": system_msg},
|
|
{"role": "user", "content": _ephemeral_user("Say hello in one word.")},
|
|
])
|
|
|
|
# Second call: same system prompt should hit the cache
|
|
llm.call([
|
|
{"role": "system", "content": system_msg},
|
|
{"role": "user", "content": _ephemeral_user("Say goodbye in one word.")},
|
|
])
|
|
|
|
usage = llm.get_token_usage_summary()
|
|
assert usage.total_tokens > 0
|
|
assert usage.successful_requests == 2
|
|
# The second call should have cached prompt tokens
|
|
assert usage.cached_prompt_tokens > 0
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_cached_prompt_tokens_with_tools():
|
|
"""
|
|
Test that Anthropic correctly tracks cached_prompt_tokens when tools are used.
|
|
The large system prompt should be cached across tool-calling requests.
|
|
"""
|
|
padding = "This is padding text to ensure the prompt is large enough for caching. " * 80
|
|
system_msg = f"You are a helpful assistant that uses tools. {padding}"
|
|
|
|
def get_weather(location: str) -> str:
|
|
return f"The weather in {location} is sunny and 72°F"
|
|
|
|
tools = [
|
|
{
|
|
"name": "get_weather",
|
|
"description": "Get the current weather for a location",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "The city name"
|
|
}
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
}
|
|
]
|
|
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5-20250929")
|
|
|
|
def _ephemeral_user(text: str):
|
|
return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}]
|
|
|
|
# First call with tool: creates the cache
|
|
llm.call(
|
|
[
|
|
{"role": "system", "content": system_msg},
|
|
{"role": "user", "content": _ephemeral_user("What is the weather in Tokyo?")},
|
|
],
|
|
tools=tools,
|
|
available_functions={"get_weather": get_weather},
|
|
)
|
|
|
|
# Second call with same system prompt + tools: should hit the cache
|
|
llm.call(
|
|
[
|
|
{"role": "system", "content": system_msg},
|
|
{"role": "user", "content": _ephemeral_user("What is the weather in Paris?")},
|
|
],
|
|
tools=tools,
|
|
available_functions={"get_weather": get_weather},
|
|
)
|
|
|
|
usage = llm.get_token_usage_summary()
|
|
assert usage.total_tokens > 0
|
|
assert usage.prompt_tokens > 0
|
|
assert usage.successful_requests == 2
|
|
# The second call should have cached prompt tokens
|
|
assert usage.cached_prompt_tokens > 0
|
|
|
|
|
|
# ---- Tool Search Tool Tests ----
|
|
|
|
|
|
def test_tool_search_true_injects_bm25_and_defer_loading():
|
|
"""tool_search=True should inject bm25 tool search and defer all tools."""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True)
|
|
|
|
crewai_tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_weather",
|
|
"description": "Get weather for a location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"location": {"type": "string"}},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "calculator",
|
|
"description": "Perform math calculations",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"expression": {"type": "string"}},
|
|
"required": ["expression"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
formatted_messages, system_message = llm._format_messages_for_anthropic(
|
|
[{"role": "user", "content": "Hello"}]
|
|
)
|
|
params = llm._prepare_completion_params(
|
|
formatted_messages, system_message, crewai_tools
|
|
)
|
|
|
|
tools = params["tools"]
|
|
# Should have 3 tools: tool_search + 2 regular
|
|
assert len(tools) == 3
|
|
|
|
# First tool should be the bm25 tool search tool
|
|
assert tools[0]["type"] == "tool_search_tool_bm25_20251119"
|
|
assert tools[0]["name"] == "tool_search_tool_bm25"
|
|
assert "input_schema" not in tools[0]
|
|
|
|
# All regular tools should have defer_loading=True
|
|
for t in tools[1:]:
|
|
assert t.get("defer_loading") is True, f"Tool {t['name']} missing defer_loading"
|
|
|
|
|
|
def test_tool_search_regex_config():
|
|
"""tool_search with regex config should use regex variant."""
|
|
from crewai.llms.providers.anthropic.completion import AnthropicToolSearchConfig
|
|
|
|
config = AnthropicToolSearchConfig(type="regex")
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=config)
|
|
|
|
crewai_tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "tool_a",
|
|
"description": "First tool",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"q": {"type": "string"}},
|
|
"required": ["q"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "tool_b",
|
|
"description": "Second tool",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"q": {"type": "string"}},
|
|
"required": ["q"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
formatted_messages, system_message = llm._format_messages_for_anthropic(
|
|
[{"role": "user", "content": "Hello"}]
|
|
)
|
|
params = llm._prepare_completion_params(
|
|
formatted_messages, system_message, crewai_tools
|
|
)
|
|
|
|
tools = params["tools"]
|
|
assert tools[0]["type"] == "tool_search_tool_regex_20251119"
|
|
assert tools[0]["name"] == "tool_search_tool_regex"
|
|
|
|
|
|
def test_tool_search_disabled_by_default():
|
|
"""tool_search=None (default) should NOT inject anything."""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5")
|
|
|
|
crewai_tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "test_tool",
|
|
"description": "A test tool",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"q": {"type": "string"}},
|
|
"required": ["q"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
formatted_messages, system_message = llm._format_messages_for_anthropic(
|
|
[{"role": "user", "content": "Hello"}]
|
|
)
|
|
params = llm._prepare_completion_params(
|
|
formatted_messages, system_message, crewai_tools
|
|
)
|
|
|
|
tools = params["tools"]
|
|
assert len(tools) == 1
|
|
for t in tools:
|
|
assert t.get("type", "") not in (
|
|
"tool_search_tool_bm25_20251119",
|
|
"tool_search_tool_regex_20251119",
|
|
)
|
|
assert "defer_loading" not in t
|
|
|
|
|
|
def test_tool_search_no_duplicate_when_manually_provided():
|
|
"""If user passes a tool search tool manually, don't inject a duplicate."""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True)
|
|
|
|
# User manually includes a tool search tool
|
|
tools_with_search = [
|
|
{"type": "tool_search_tool_regex_20251119", "name": "tool_search_tool_regex"},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "test_tool",
|
|
"description": "A test tool",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"q": {"type": "string"}},
|
|
"required": ["q"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
formatted_messages, system_message = llm._format_messages_for_anthropic(
|
|
[{"role": "user", "content": "Hello"}]
|
|
)
|
|
params = llm._prepare_completion_params(
|
|
formatted_messages, system_message, tools_with_search
|
|
)
|
|
|
|
tools = params["tools"]
|
|
search_tools = [
|
|
t for t in tools
|
|
if t.get("type", "").startswith("tool_search_tool")
|
|
]
|
|
# Should only have 1 tool search tool (the user's manual one)
|
|
assert len(search_tools) == 1
|
|
assert search_tools[0]["type"] == "tool_search_tool_regex_20251119"
|
|
|
|
|
|
def test_tool_search_passthrough_preserves_tool_search_type():
|
|
"""_convert_tools_for_interference should pass through tool search tools unchanged."""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5")
|
|
|
|
tools = [
|
|
{"type": "tool_search_tool_regex_20251119", "name": "tool_search_tool_regex"},
|
|
{
|
|
"name": "get_weather",
|
|
"description": "Get weather",
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {"location": {"type": "string"}},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
]
|
|
|
|
converted = llm._convert_tools_for_interference(tools)
|
|
assert len(converted) == 2
|
|
# Tool search tool should be passed through exactly
|
|
assert converted[0] == {
|
|
"type": "tool_search_tool_regex_20251119",
|
|
"name": "tool_search_tool_regex",
|
|
}
|
|
# Regular tool should be preserved
|
|
assert converted[1]["name"] == "get_weather"
|
|
assert "input_schema" in converted[1]
|
|
|
|
|
|
def test_tool_search_single_tool_skips_search_and_forces_choice():
|
|
"""With only 1 tool, tool_search is skipped (nothing to search) and the
|
|
normal forced tool_choice optimisation still applies."""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True)
|
|
|
|
crewai_tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "test_tool",
|
|
"description": "A test tool",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {"q": {"type": "string"}},
|
|
"required": ["q"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
formatted_messages, system_message = llm._format_messages_for_anthropic(
|
|
[{"role": "user", "content": "Hello"}]
|
|
)
|
|
params = llm._prepare_completion_params(
|
|
formatted_messages,
|
|
system_message,
|
|
crewai_tools,
|
|
available_functions={"test_tool": lambda q: "result"},
|
|
)
|
|
|
|
# Single tool — tool_search skipped, tool_choice forced as normal
|
|
assert "tool_choice" in params
|
|
assert params["tool_choice"]["name"] == "test_tool"
|
|
|
|
# No tool search tool should be injected
|
|
tool_types = [t.get("type", "") for t in params["tools"]]
|
|
for ts_type in ("tool_search_tool_bm25_20251119", "tool_search_tool_regex_20251119"):
|
|
assert ts_type not in tool_types
|
|
|
|
# No defer_loading on the single tool
|
|
assert "defer_loading" not in params["tools"][0]
|
|
|
|
|
|
def test_tool_search_via_llm_class():
|
|
"""Verify tool_search param passes through LLM class correctly."""
|
|
from crewai.llms.providers.anthropic.completion import (
|
|
AnthropicCompletion,
|
|
AnthropicToolSearchConfig,
|
|
)
|
|
|
|
# Test with True
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True)
|
|
assert isinstance(llm, AnthropicCompletion)
|
|
assert llm.tool_search is not None
|
|
assert llm.tool_search.type == "bm25"
|
|
|
|
# Test with config
|
|
llm2 = LLM(
|
|
model="anthropic/claude-sonnet-4-5",
|
|
tool_search=AnthropicToolSearchConfig(type="regex"),
|
|
)
|
|
assert llm2.tool_search is not None
|
|
assert llm2.tool_search.type == "regex"
|
|
|
|
# Test without (default)
|
|
llm3 = LLM(model="anthropic/claude-sonnet-4-5")
|
|
assert llm3.tool_search is None
|
|
|
|
|
|
# Many tools shared by the VCR tests below
|
|
_MANY_TOOLS = [
|
|
{
|
|
"name": name,
|
|
"description": desc,
|
|
"input_schema": {
|
|
"type": "object",
|
|
"properties": {"input": {"type": "string", "description": f"Input for {name}"}},
|
|
"required": ["input"],
|
|
},
|
|
}
|
|
for name, desc in [
|
|
("get_weather", "Get current weather conditions for a specified location"),
|
|
("search_files", "Search through files in the workspace by name or content"),
|
|
("read_database", "Read records from a database table with optional filtering"),
|
|
("write_database", "Write or update records in a database table"),
|
|
("send_email", "Send an email message to one or more recipients"),
|
|
("read_email", "Read emails from inbox with filtering options"),
|
|
("create_ticket", "Create a new support ticket in the ticketing system"),
|
|
("update_ticket", "Update an existing support ticket status or description"),
|
|
("list_users", "List all users in the system with optional filters"),
|
|
("get_user_profile", "Get detailed profile information for a specific user"),
|
|
("deploy_service", "Deploy a service to the specified environment"),
|
|
("rollback_service", "Rollback a service deployment to a previous version"),
|
|
("get_service_logs", "Get service logs filtered by time range and severity"),
|
|
("run_sql_query", "Run a read-only SQL query against the analytics database"),
|
|
("create_dashboard", "Create a new monitoring dashboard with widgets"),
|
|
]
|
|
]
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_tool_search_discovers_and_calls_tool():
|
|
"""Tool search should discover the right tool and return a tool_use block."""
|
|
llm = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True)
|
|
|
|
result = llm.call(
|
|
"What is the weather in Tokyo?",
|
|
tools=_MANY_TOOLS,
|
|
)
|
|
|
|
# Should return tool_use blocks (list) since no available_functions provided
|
|
assert isinstance(result, list)
|
|
assert len(result) >= 1
|
|
# The discovered tool should be get_weather
|
|
tool_names = [getattr(block, "name", None) for block in result]
|
|
assert "get_weather" in tool_names
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_tool_search_saves_input_tokens():
|
|
"""Tool search with deferred loading should use fewer input tokens than loading all tools."""
|
|
# Call WITHOUT tool search — all 15 tools loaded upfront
|
|
llm_no_search = LLM(model="anthropic/claude-sonnet-4-5")
|
|
llm_no_search.call("What is the weather in Tokyo?", tools=_MANY_TOOLS)
|
|
usage_no_search = llm_no_search.get_token_usage_summary()
|
|
|
|
# Call WITH tool search — tools deferred
|
|
llm_search = LLM(model="anthropic/claude-sonnet-4-5", tool_search=True)
|
|
llm_search.call("What is the weather in Tokyo?", tools=_MANY_TOOLS)
|
|
usage_search = llm_search.get_token_usage_summary()
|
|
|
|
# Tool search should use fewer input tokens
|
|
assert usage_search.prompt_tokens < usage_no_search.prompt_tokens, (
|
|
f"Expected tool_search ({usage_search.prompt_tokens}) to use fewer input tokens "
|
|
f"than no search ({usage_no_search.prompt_tokens})"
|
|
)
|