mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
feat: Add extended thinking support for Anthropic Claude
- Add thinking parameter to AnthropicCompletion.__init__ - Include thinking parameter in API calls via _prepare_completion_params - Thinking blocks are automatically preserved in tool use conversations - Add comprehensive tests for extended thinking with tool use - Fixes #3964 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -50,6 +50,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
top_p: float | None = None,
|
||||
stop_sequences: list[str] | None = None,
|
||||
stream: bool = False,
|
||||
thinking: dict[str, Any] | None = None,
|
||||
client_params: dict[str, Any] | None = None,
|
||||
interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
|
||||
**kwargs: Any,
|
||||
@@ -67,6 +68,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
top_p: Nucleus sampling parameter
|
||||
stop_sequences: Stop sequences (Anthropic uses stop_sequences, not stop)
|
||||
stream: Enable streaming responses
|
||||
thinking: Extended thinking configuration (e.g., {'type': 'enabled', 'budget_tokens': 5000})
|
||||
client_params: Additional parameters for the Anthropic client
|
||||
interceptor: HTTP interceptor for modifying requests/responses at transport level.
|
||||
**kwargs: Additional parameters
|
||||
@@ -89,6 +91,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
self.top_p = top_p
|
||||
self.stream = stream
|
||||
self.stop_sequences = stop_sequences or []
|
||||
self.thinking = thinking
|
||||
|
||||
# Model-specific settings
|
||||
self.is_claude_3 = "claude-3" in model.lower()
|
||||
@@ -248,6 +251,10 @@ class AnthropicCompletion(BaseLLM):
|
||||
if self.stop_sequences:
|
||||
params["stop_sequences"] = self.stop_sequences
|
||||
|
||||
# Add extended thinking configuration if set
|
||||
if self.thinking is not None:
|
||||
params["thinking"] = self.thinking
|
||||
|
||||
# Handle tools for Claude 3+
|
||||
if tools and self.supports_tools:
|
||||
params["tools"] = self._convert_tools_for_interference(tools)
|
||||
|
||||
@@ -698,3 +698,253 @@ def test_anthropic_stop_sequences_sent_to_api():
|
||||
assert result is not None
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_anthropic_extended_thinking_parameter():
|
||||
"""
|
||||
Test that AnthropicCompletion accepts and stores the thinking parameter
|
||||
"""
|
||||
thinking_config = {"type": "enabled", "budget_tokens": 5000}
|
||||
|
||||
llm = LLM(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
thinking=thinking_config
|
||||
)
|
||||
|
||||
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
||||
assert isinstance(llm, AnthropicCompletion)
|
||||
assert llm.thinking == thinking_config
|
||||
|
||||
|
||||
def test_anthropic_extended_thinking_added_to_api_call():
|
||||
"""
|
||||
Test that the thinking parameter is added to the API call parameters
|
||||
"""
|
||||
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
||||
|
||||
thinking_config = {"type": "enabled", "budget_tokens": 5000}
|
||||
completion = AnthropicCompletion(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
thinking=thinking_config
|
||||
)
|
||||
|
||||
# Test _prepare_completion_params includes thinking
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
params = completion._prepare_completion_params(messages)
|
||||
|
||||
assert "thinking" in params
|
||||
assert params["thinking"] == thinking_config
|
||||
|
||||
|
||||
def test_anthropic_extended_thinking_not_added_when_none():
|
||||
"""
|
||||
Test that the thinking parameter is not added to API call when None
|
||||
"""
|
||||
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
||||
|
||||
completion = AnthropicCompletion(model="claude-3-5-sonnet-20241022")
|
||||
|
||||
# Test _prepare_completion_params does not include thinking when None
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
params = completion._prepare_completion_params(messages)
|
||||
|
||||
assert "thinking" not in params
|
||||
|
||||
|
||||
def test_anthropic_extended_thinking_with_tool_use_preserves_thinking_blocks():
|
||||
"""
|
||||
Test that thinking blocks are preserved in tool use conversation flow
|
||||
"""
|
||||
from unittest.mock import Mock, patch
|
||||
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
||||
from anthropic.types.tool_use_block import ToolUseBlock
|
||||
from anthropic.types import ThinkingBlock
|
||||
|
||||
thinking_config = {"type": "enabled", "budget_tokens": 5000}
|
||||
completion = AnthropicCompletion(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
thinking=thinking_config
|
||||
)
|
||||
|
||||
# Mock tool function
|
||||
def mock_calculator(operation: str, a: int, b: int) -> str:
|
||||
if operation == "add":
|
||||
return str(a + b)
|
||||
return "0"
|
||||
|
||||
available_functions = {"calculator": mock_calculator}
|
||||
|
||||
# Mock the Anthropic client responses
|
||||
with patch.object(completion.client.messages, 'create') as mock_create:
|
||||
# Mock thinking block
|
||||
mock_thinking_block = Mock(spec=ThinkingBlock)
|
||||
mock_thinking_block.type = "thinking"
|
||||
mock_thinking_block.thinking = "I need to use the calculator tool to add 5 and 3"
|
||||
|
||||
# Mock tool use block
|
||||
mock_tool_use = Mock(spec=ToolUseBlock)
|
||||
mock_tool_use.id = "tool_456"
|
||||
mock_tool_use.name = "calculator"
|
||||
mock_tool_use.input = {"operation": "add", "a": 5, "b": 3}
|
||||
mock_tool_use.type = "tool_use"
|
||||
|
||||
# Mock initial response with thinking block + tool use
|
||||
mock_initial_response = Mock()
|
||||
mock_initial_response.content = [mock_thinking_block, mock_tool_use]
|
||||
mock_initial_response.usage = Mock()
|
||||
mock_initial_response.usage.input_tokens = 100
|
||||
mock_initial_response.usage.output_tokens = 50
|
||||
|
||||
# Mock final response after tool result
|
||||
mock_text_block = Mock()
|
||||
mock_text_block.configure_mock(text="The sum of 5 and 3 is 8.")
|
||||
|
||||
mock_final_response = Mock()
|
||||
mock_final_response.content = [mock_text_block]
|
||||
mock_final_response.usage = Mock()
|
||||
mock_final_response.usage.input_tokens = 150
|
||||
mock_final_response.usage.output_tokens = 75
|
||||
|
||||
# Configure mock to return different responses on successive calls
|
||||
mock_create.side_effect = [mock_initial_response, mock_final_response]
|
||||
|
||||
# Test the call
|
||||
messages = [{"role": "user", "content": "What is 5 + 3?"}]
|
||||
result = completion.call(
|
||||
messages=messages,
|
||||
available_functions=available_functions
|
||||
)
|
||||
|
||||
# Verify the result contains the final response
|
||||
assert "sum of 5 and 3 is 8" in result
|
||||
|
||||
# Verify that two API calls were made (initial + follow-up)
|
||||
assert mock_create.call_count == 2
|
||||
|
||||
# Verify the first call includes thinking parameter
|
||||
first_call_kwargs = mock_create.call_args_list[0][1]
|
||||
assert "thinking" in first_call_kwargs
|
||||
assert first_call_kwargs["thinking"] == thinking_config
|
||||
|
||||
# Verify the second call includes thinking blocks in assistant message
|
||||
second_call_kwargs = mock_create.call_args_list[1][1]
|
||||
messages_in_second_call = second_call_kwargs["messages"]
|
||||
|
||||
# Should have original user message + assistant (with thinking + tool_use) + user tool result
|
||||
assert len(messages_in_second_call) == 3
|
||||
assert messages_in_second_call[0]["role"] == "user"
|
||||
assert messages_in_second_call[1]["role"] == "assistant"
|
||||
assert messages_in_second_call[2]["role"] == "user"
|
||||
|
||||
# Verify assistant message content includes both thinking and tool_use blocks
|
||||
assistant_content = messages_in_second_call[1]["content"]
|
||||
assert len(assistant_content) == 2
|
||||
assert assistant_content[0] == mock_thinking_block # Thinking block preserved
|
||||
assert assistant_content[1] == mock_tool_use # Tool use block preserved
|
||||
|
||||
# Verify tool result format
|
||||
tool_result = messages_in_second_call[2]["content"][0]
|
||||
assert tool_result["type"] == "tool_result"
|
||||
assert tool_result["tool_use_id"] == "tool_456"
|
||||
assert "8" in tool_result["content"]
|
||||
|
||||
# Verify the second call also includes thinking parameter
|
||||
assert "thinking" in second_call_kwargs
|
||||
assert second_call_kwargs["thinking"] == thinking_config
|
||||
|
||||
|
||||
def test_anthropic_extended_thinking_with_multiple_tool_calls():
|
||||
"""
|
||||
Test that thinking blocks are preserved across multiple tool calls
|
||||
"""
|
||||
from unittest.mock import Mock, patch
|
||||
from crewai.llms.providers.anthropic.completion import AnthropicCompletion
|
||||
from anthropic.types.tool_use_block import ToolUseBlock
|
||||
from anthropic.types import ThinkingBlock
|
||||
|
||||
thinking_config = {"type": "enabled", "budget_tokens": 10000}
|
||||
completion = AnthropicCompletion(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
thinking=thinking_config
|
||||
)
|
||||
|
||||
# Mock tool functions
|
||||
def mock_search(query: str) -> str:
|
||||
return f"Search results for: {query}"
|
||||
|
||||
def mock_summarize(text: str) -> str:
|
||||
return f"Summary: {text[:20]}..."
|
||||
|
||||
available_functions = {
|
||||
"search": mock_search,
|
||||
"summarize": mock_summarize
|
||||
}
|
||||
|
||||
# Mock the Anthropic client responses
|
||||
with patch.object(completion.client.messages, 'create') as mock_create:
|
||||
# Mock thinking block
|
||||
mock_thinking_block = Mock(spec=ThinkingBlock)
|
||||
mock_thinking_block.type = "thinking"
|
||||
mock_thinking_block.thinking = "I should search first, then summarize"
|
||||
|
||||
# Mock first tool use
|
||||
mock_tool_use_1 = Mock(spec=ToolUseBlock)
|
||||
mock_tool_use_1.id = "tool_1"
|
||||
mock_tool_use_1.name = "search"
|
||||
mock_tool_use_1.input = {"query": "AI agents"}
|
||||
mock_tool_use_1.type = "tool_use"
|
||||
|
||||
# Mock second tool use
|
||||
mock_tool_use_2 = Mock(spec=ToolUseBlock)
|
||||
mock_tool_use_2.id = "tool_2"
|
||||
mock_tool_use_2.name = "summarize"
|
||||
mock_tool_use_2.input = {"text": "Search results for: AI agents"}
|
||||
mock_tool_use_2.type = "tool_use"
|
||||
|
||||
# Mock initial response with thinking + multiple tool uses
|
||||
mock_initial_response = Mock()
|
||||
mock_initial_response.content = [mock_thinking_block, mock_tool_use_1, mock_tool_use_2]
|
||||
mock_initial_response.usage = Mock()
|
||||
mock_initial_response.usage.input_tokens = 100
|
||||
mock_initial_response.usage.output_tokens = 50
|
||||
|
||||
# Mock final response
|
||||
mock_text_block = Mock()
|
||||
mock_text_block.configure_mock(text="Here's the summary of AI agents research.")
|
||||
|
||||
mock_final_response = Mock()
|
||||
mock_final_response.content = [mock_text_block]
|
||||
mock_final_response.usage = Mock()
|
||||
mock_final_response.usage.input_tokens = 200
|
||||
mock_final_response.usage.output_tokens = 100
|
||||
|
||||
mock_create.side_effect = [mock_initial_response, mock_final_response]
|
||||
|
||||
# Test the call
|
||||
messages = [{"role": "user", "content": "Research AI agents and summarize"}]
|
||||
result = completion.call(
|
||||
messages=messages,
|
||||
available_functions=available_functions
|
||||
)
|
||||
|
||||
# Verify result
|
||||
assert "summary of AI agents" in result
|
||||
|
||||
# Verify two API calls
|
||||
assert mock_create.call_count == 2
|
||||
|
||||
# Verify the second call preserves thinking block and all tool uses
|
||||
second_call_kwargs = mock_create.call_args_list[1][1]
|
||||
messages_in_second_call = second_call_kwargs["messages"]
|
||||
|
||||
assistant_content = messages_in_second_call[1]["content"]
|
||||
assert len(assistant_content) == 3 # thinking + 2 tool uses
|
||||
assert assistant_content[0] == mock_thinking_block
|
||||
assert assistant_content[1] == mock_tool_use_1
|
||||
assert assistant_content[2] == mock_tool_use_2
|
||||
|
||||
# Verify tool results
|
||||
tool_results = messages_in_second_call[2]["content"]
|
||||
assert len(tool_results) == 2
|
||||
assert tool_results[0]["tool_use_id"] == "tool_1"
|
||||
assert tool_results[1]["tool_use_id"] == "tool_2"
|
||||
|
||||
Reference in New Issue
Block a user