feat(bedrock): add support for Anthropic Claude V4 models

- Add _strip_region_prefix() to handle us./eu./apac./global. prefixes
- Update get_context_window_size() with V4 model entries (200K tokens)
- Update supports_multimodal() to detect V4 models
- Update _is_nova_model() to use region prefix stripping
- Add V4 model context windows to LLM_CONTEXT_WINDOW_SIZES in llm.py
- Add 11 comprehensive tests covering V4 model detection, context windows,
  multimodal support, region prefixes, function calling, and regression

Closes #4700

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-03-04 14:48:43 +00:00
parent ad82e52d39
commit d0da1149f0
3 changed files with 291 additions and 11 deletions

View File

@@ -248,6 +248,14 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = {
"us.amazon.nova-pro-v1:0": 300000,
"us.amazon.nova-micro-v1:0": 128000,
"us.amazon.nova-lite-v1:0": 300000,
# Claude 4.x (us-prefixed)
"us.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
"us.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
"us.anthropic.claude-opus-4-5-20251101-v1:0": 200000,
"us.anthropic.claude-opus-4-1-20250805-v1:0": 200000,
"us.anthropic.claude-opus-4-20250514-v1:0": 200000,
"us.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
# Claude 3.x (us-prefixed)
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
"us.anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": 200000,
@@ -263,11 +271,20 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = {
"us.meta.llama3-1-70b-instruct-v1:0": 128000,
"us.meta.llama3-3-70b-instruct-v1:0": 128000,
"us.meta.llama3-1-405b-instruct-v1:0": 128000,
# Claude 4.x (eu-prefixed)
"eu.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
"eu.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
"eu.anthropic.claude-opus-4-20250514-v1:0": 200000,
# Claude 3.x (eu-prefixed)
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
"eu.anthropic.claude-3-sonnet-20240229-v1:0": 200000,
"eu.anthropic.claude-3-haiku-20240307-v1:0": 200000,
"eu.meta.llama3-2-3b-instruct-v1:0": 131000,
"eu.meta.llama3-2-1b-instruct-v1:0": 131000,
# Claude 4.x (apac-prefixed)
"apac.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
"apac.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
# Claude 3.x (apac-prefixed)
"apac.anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0": 200000,
"apac.anthropic.claude-3-sonnet-20240229-v1:0": 200000,
@@ -275,6 +292,14 @@ LLM_CONTEXT_WINDOW_SIZES: Final[dict[str, int]] = {
"amazon.nova-pro-v1:0": 300000,
"amazon.nova-micro-v1:0": 128000,
"amazon.nova-lite-v1:0": 300000,
# Claude 4.x (no region prefix)
"anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
"anthropic.claude-sonnet-4-20250514-v1:0": 200000,
"anthropic.claude-opus-4-5-20251101-v1:0": 200000,
"anthropic.claude-opus-4-1-20250805-v1:0": 200000,
"anthropic.claude-opus-4-20250514-v1:0": 200000,
"anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
# Claude 3.x (no region prefix)
"anthropic.claude-3-5-sonnet-20240620-v1:0": 200000,
"anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
"anthropic.claude-3-5-sonnet-20241022-v2:0": 200000,

View File

@@ -2038,18 +2038,47 @@ class BedrockCompletion(BaseLLM):
"""Check if the model supports stop words."""
return True
@staticmethod
def _strip_region_prefix(model: str) -> str:
"""Strip AWS region prefix from model ID.
Bedrock model IDs can be prefixed with region qualifiers like
``us.``, ``eu.``, ``apac.``, or ``global.`` for cross-region
inference. This helper returns the base model ID so that
capability look-ups work regardless of the prefix.
Args:
model: Full model ID, e.g. ``us.anthropic.claude-sonnet-4-20250514-v1:0``
Returns:
Base model ID, e.g. ``anthropic.claude-sonnet-4-20250514-v1:0``
"""
region_prefixes = ("us.", "eu.", "apac.", "global.")
for prefix in region_prefixes:
if model.startswith(prefix):
return model[len(prefix):]
return model
def get_context_window_size(self) -> int:
"""Get the context window size for the model."""
from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO
# Context window sizes for common Bedrock models
context_windows = {
# Claude 4.x models
"anthropic.claude-sonnet-4-5": 200000,
"anthropic.claude-sonnet-4": 200000,
"anthropic.claude-opus-4-5": 200000,
"anthropic.claude-opus-4-1": 200000,
"anthropic.claude-opus-4": 200000,
"anthropic.claude-haiku-4-5": 200000,
# Claude 3.x models
"anthropic.claude-3-7-sonnet": 200000,
"anthropic.claude-3-5-sonnet": 200000,
"anthropic.claude-3-5-haiku": 200000,
"anthropic.claude-3-opus": 200000,
"anthropic.claude-3-sonnet": 200000,
"anthropic.claude-3-haiku": 200000,
"anthropic.claude-3-7-sonnet": 200000,
"anthropic.claude-v2": 100000,
"amazon.titan-text-express": 8000,
"ai21.j2-ultra": 8192,
@@ -2060,9 +2089,12 @@ class BedrockCompletion(BaseLLM):
"deepseek.r1": 32768,
}
# Strip region prefix (us., eu., apac., global.) for matching
base_model = self._strip_region_prefix(self.model)
# Find the best match for the model name
for model_prefix, size in context_windows.items():
if self.model.startswith(model_prefix):
if base_model.startswith(model_prefix):
return int(size * CONTEXT_WINDOW_USAGE_RATIO)
# Default context window size
@@ -2071,22 +2103,26 @@ class BedrockCompletion(BaseLLM):
def supports_multimodal(self) -> bool:
"""Check if the model supports multimodal inputs.
Claude 3+ and Nova Lite/Pro/Premier on Bedrock support vision.
Claude 3+, Claude 4.x, and Nova Lite/Pro/Premier on Bedrock support vision.
Returns:
True if the model supports images.
"""
model_lower = self.model.lower()
vision_models = (
# Strip region prefix so we only need base-model prefixes
base_model = self._strip_region_prefix(self.model).lower()
vision_prefixes = (
# Claude 4.x models
"anthropic.claude-sonnet-4",
"anthropic.claude-opus-4",
"anthropic.claude-haiku-4",
# Claude 3.x models
"anthropic.claude-3",
# Amazon Nova models
"amazon.nova-lite",
"amazon.nova-pro",
"amazon.nova-premier",
"us.amazon.nova-lite",
"us.amazon.nova-pro",
"us.amazon.nova-premier",
)
return any(model_lower.startswith(m) for m in vision_models)
return any(base_model.startswith(m) for m in vision_prefixes)
def _is_nova_model(self) -> bool:
"""Check if the model is an Amazon Nova model.
@@ -2096,8 +2132,8 @@ class BedrockCompletion(BaseLLM):
Returns:
True if the model is a Nova model.
"""
model_lower = self.model.lower()
return "amazon.nova-" in model_lower
base_model = self._strip_region_prefix(self.model).lower()
return base_model.startswith("amazon.nova-")
def get_file_uploader(self) -> Any:
"""Get a Bedrock S3 file uploader using this LLM's AWS credentials.

View File

@@ -886,6 +886,225 @@ def test_bedrock_stop_sequences_sent_to_api():
assert call_kwargs["inferenceConfig"]["stopSequences"] == ["\nObservation:", "\nThought:"]
# =============================================================================
# Claude V4 Model Support Tests
# =============================================================================
def test_bedrock_claude_v4_model_detection():
"""Test that Claude V4 model IDs are properly detected and routed to BedrockCompletion."""
from crewai.llms.providers.bedrock.completion import BedrockCompletion
v4_models = [
"bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-1-20250805-v1:0",
"bedrock/anthropic.claude-haiku-4-5-20251001-v1:0",
"bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0",
"bedrock/anthropic.claude-opus-4-5-20251101-v1:0",
]
for model_name in v4_models:
llm = LLM(model=model_name)
assert isinstance(llm, BedrockCompletion), f"Failed for model: {model_name}"
assert llm.is_claude_model, f"is_claude_model should be True for {model_name}"
def test_bedrock_claude_v4_context_window_size():
"""Test that Claude V4 models return correct context window sizes (200K)."""
v4_models = [
"bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-1-20250805-v1:0",
"bedrock/anthropic.claude-haiku-4-5-20251001-v1:0",
"bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0",
"bedrock/anthropic.claude-opus-4-5-20251101-v1:0",
]
for model_name in v4_models:
llm = LLM(model=model_name)
context_size = llm.get_context_window_size()
# 200000 * 0.85 = 170000
assert context_size > 150000, (
f"Context window for {model_name} should be ~170K (200K * 0.85), got {context_size}"
)
def test_bedrock_claude_v4_multimodal_support():
"""Test that Claude V4 models are correctly detected as supporting multimodal."""
v4_models = [
"bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-1-20250805-v1:0",
"bedrock/anthropic.claude-haiku-4-5-20251001-v1:0",
"bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0",
"bedrock/anthropic.claude-opus-4-5-20251101-v1:0",
]
for model_name in v4_models:
llm = LLM(model=model_name)
assert llm.supports_multimodal(), (
f"supports_multimodal() should return True for {model_name}"
)
def test_bedrock_claude_v4_with_region_prefix():
"""Test that Claude V4 models with region prefixes are properly handled."""
from crewai.llms.providers.bedrock.completion import BedrockCompletion
region_prefixed_models = [
"bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0",
"bedrock/eu.anthropic.claude-sonnet-4-20250514-v1:0",
"bedrock/apac.anthropic.claude-sonnet-4-5-20250929-v1:0",
"bedrock/global.anthropic.claude-opus-4-20250514-v1:0",
"bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0",
]
for model_name in region_prefixed_models:
llm = LLM(model=model_name)
assert isinstance(llm, BedrockCompletion), f"Failed for model: {model_name}"
# Context window should still be correctly detected
context_size = llm.get_context_window_size()
assert context_size > 150000, (
f"Context window for {model_name} should be ~170K, got {context_size}"
)
# Multimodal should still be detected
assert llm.supports_multimodal(), (
f"supports_multimodal() should return True for {model_name}"
)
def test_bedrock_strip_region_prefix():
"""Test that _strip_region_prefix correctly strips region qualifiers."""
from crewai.llms.providers.bedrock.completion import BedrockCompletion
test_cases = [
("us.anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-20250514-v1:0"),
("eu.anthropic.claude-3-5-sonnet-20241022-v2:0", "anthropic.claude-3-5-sonnet-20241022-v2:0"),
("apac.anthropic.claude-sonnet-4-5-20250929-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0"),
("global.anthropic.claude-opus-4-20250514-v1:0", "anthropic.claude-opus-4-20250514-v1:0"),
("anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-20250514-v1:0"),
("amazon.nova-pro-v1:0", "amazon.nova-pro-v1:0"),
]
for input_model, expected in test_cases:
result = BedrockCompletion._strip_region_prefix(input_model)
assert result == expected, (
f"_strip_region_prefix({input_model!r}) returned {result!r}, expected {expected!r}"
)
def test_bedrock_claude_v4_inference_config():
"""Test that inference config is properly prepared for Claude V4 models."""
llm = LLM(
model="bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
temperature=0.7,
top_p=0.9,
top_k=40,
max_tokens=2000,
)
config = llm._get_inference_config()
assert config["temperature"] == 0.7
assert config["topP"] == 0.9
assert config["topK"] == 40
assert config["maxTokens"] == 2000
def test_bedrock_claude_v4_supports_function_calling():
"""Test that Claude V4 models support function calling."""
v4_models = [
"bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
"bedrock/anthropic.claude-opus-4-20250514-v1:0",
"bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0",
]
for model_name in v4_models:
llm = LLM(model=model_name)
assert llm.supports_function_calling(), (
f"supports_function_calling() should return True for {model_name}"
)
def test_bedrock_region_prefix_nova_model_detection():
"""Test that _is_nova_model works with region-prefixed model IDs."""
# Nova models with region prefix
nova_llm = LLM(model="bedrock/us.amazon.nova-pro-v1:0")
assert nova_llm._is_nova_model(), "us.amazon.nova-pro should be detected as Nova model"
# Claude model should not be detected as Nova
claude_llm = LLM(model="bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0")
assert not claude_llm._is_nova_model(), "Claude model should not be detected as Nova"
def test_bedrock_claude_v4_initialization_with_all_params():
"""Test that Claude V4 models can be initialized with all supported parameters."""
from crewai.llms.providers.bedrock.completion import BedrockCompletion
llm = LLM(
model="bedrock/anthropic.claude-sonnet-4-20250514-v1:0",
temperature=0.5,
max_tokens=4096,
top_p=0.95,
top_k=50,
stop_sequences=["\\nHuman:", "\\nAssistant:"],
stream=True,
region_name="us-west-2",
)
assert isinstance(llm, BedrockCompletion)
assert llm.model == "anthropic.claude-sonnet-4-20250514-v1:0"
assert llm.temperature == 0.5
assert llm.max_tokens == 4096
assert llm.top_p == 0.95
assert llm.top_k == 50
assert llm.stream is True
assert llm.region_name == "us-west-2"
assert llm.is_claude_model is True
def test_bedrock_claude_v4_call_with_mock(bedrock_mocks):
"""Test that Claude V4 models can make API calls (mocked)."""
_, mock_client = bedrock_mocks
mock_client.converse.return_value = {
"output": {
"message": {
"role": "assistant",
"content": [{"text": "Hello from Claude Sonnet 4!"}],
}
},
"usage": {"inputTokens": 10, "outputTokens": 8, "totalTokens": 18},
}
llm = LLM(model="bedrock/anthropic.claude-sonnet-4-20250514-v1:0")
result = llm.call("Hello")
assert result == "Hello from Claude Sonnet 4!"
assert mock_client.converse.called
def test_bedrock_v3_models_still_work():
"""Regression test: ensure Claude V3 models still work after V4 changes."""
from crewai.llms.providers.bedrock.completion import BedrockCompletion
v3_models = [
"bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0",
"bedrock/anthropic.claude-3-haiku-20240307-v1:0",
"bedrock/anthropic.claude-3-opus-20240229-v1:0",
"bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
]
for model_name in v3_models:
llm = LLM(model=model_name)
assert isinstance(llm, BedrockCompletion), f"Failed for model: {model_name}"
assert llm.is_claude_model is True
assert llm.get_context_window_size() > 150000
assert llm.supports_multimodal()
# =============================================================================
# Agent Kickoff Structured Output Tests
# =============================================================================