Lorenze/bedrock llm (#3693)

* feat: add AWS Bedrock support and update dependencies - Introduced BedrockCompletion class for AWS Bedrock integration in LLM. - Added boto3 as a new dependency in both pyproject.toml and uv.lock. - Updated LLM class to support Bedrock provider. - Created new files for Bedrock provider implementation. * using converse api * converse * linted * refactor: update BedrockCompletion class to improve parameter handling - Changed max_tokens from a fixed integer to an optional integer. - Simplified model ID assignment by removing the inference profile mapping method. - Cleaned up comments and unnecessary code related to tool specifications and model-specific parameters.
2026-01-11 00:58:30 +00:00 · 2025-10-13 20:42:34 -07:00
parent 6c5ac13242
commit cec4e4c2e9
5 changed files with 569 additions and 1 deletions
--- a/lib/crewai/pyproject.toml
+++ b/lib/crewai/pyproject.toml
@@ -85,6 +85,9 @@ voyageai = [
 litellm = [
    "litellm>=1.74.9",
 ]
 boto3 = [
    "boto3>=1.40.45",
 ]
 [project.scripts]
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -367,6 +367,14 @@ class LLM(BaseLLM):
            except ImportError:
                return None
        elif provider == "bedrock":
            try:
                from crewai.llms.providers.bedrock.completion import BedrockCompletion
                return BedrockCompletion
            except ImportError:
                return None
        return None
    def __init__(
--- a/lib/crewai/src/crewai/llms/providers/bedrock/init.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/init.py
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -0,0 +1,553 @@
 from collections.abc import Mapping, Sequence
 import logging
 import os
 from typing import Any
 from crewai.events.types.llm_events import LLMCallType
 from crewai.llms.base_llm import BaseLLM
 from crewai.utilities.agent_utils import is_context_length_exceeded
 from crewai.utilities.exceptions.context_window_exceeding_exception import (
    LLMContextLengthExceededError,
 )
 try:
    from boto3.session import Session
    from botocore.config import Config
    from botocore.exceptions import BotoCoreError, ClientError
 except ImportError:
    raise ImportError(
        "AWS Bedrock native provider not available, to install: `uv add boto3`"
    ) from None
 class BedrockCompletion(BaseLLM):
    """AWS Bedrock native completion implementation using the Converse API.
    This class provides direct integration with AWS Bedrock using the modern
    Converse API, which provides a unified interface across all Bedrock models.
    """
    def __init__(
        self,
        model: str = "anthropic.claude-3-5-sonnet-20241022-v2:0",
        aws_access_key_id: str | None = None,
        aws_secret_access_key: str | None = None,
        aws_session_token: str | None = None,
        region_name: str = "us-east-1",
        temperature: float | None = None,
        max_tokens: int | None = None,
        top_p: float | None = None,
        top_k: int | None = None,
        stop_sequences: Sequence[str] | None = None,
        stream: bool = False,
        **kwargs,
    ):
        """Initialize AWS Bedrock completion client."""
        # Extract provider from kwargs to avoid duplicate argument
        kwargs.pop("provider", None)
        super().__init__(
            model=model,
            temperature=temperature,
            stop=stop_sequences or [],
            provider="bedrock",
            **kwargs,
        )
        # Initialize Bedrock client with proper configuration
        session = Session(
            aws_access_key_id=aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"),
            aws_secret_access_key=aws_secret_access_key
            or os.getenv("AWS_SECRET_ACCESS_KEY"),
            aws_session_token=aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
            region_name=region_name,
        )
        # Configure client with timeouts and retries following AWS best practices
        config = Config(
            connect_timeout=60,
            read_timeout=300,
            retries={
                "max_attempts": 3,
                "mode": "adaptive",
            },
            tcp_keepalive=True,
        )
        self.client = session.client("bedrock-runtime", config=config)
        self.region_name = region_name
        # Store completion parameters
        self.max_tokens = max_tokens
        self.top_p = top_p
        self.top_k = top_k
        self.stream = stream
        self.stop_sequences = stop_sequences or []
        # Model-specific settings
        self.is_claude_model = "claude" in model.lower()
        self.supports_tools = True  # Converse API supports tools for most models
        self.supports_streaming = True
        # Handle inference profiles for newer models
        self.model_id = model
    def call(
        self,
        messages: str | list[dict[str, str]],
        tools: Sequence[Mapping[str, Any]] | None = None,
        callbacks: list[Any] | None = None,
        available_functions: dict[str, Any] | None = None,
        from_task: Any | None = None,
        from_agent: Any | None = None,
    ) -> str | Any:
        """Call AWS Bedrock Converse API."""
        try:
            # Emit call started event
            self._emit_call_started_event(
                messages=messages,
                tools=tools,
                callbacks=callbacks,
                available_functions=available_functions,
                from_task=from_task,
                from_agent=from_agent,
            )
            # Format messages for Converse API
            formatted_messages, system_message = self._format_messages_for_converse(
                messages
            )
            # Prepare tool configuration
            tool_config = None
            if tools:
                tool_config = {"tools": self._format_tools_for_converse(tools)}
            # Prepare request body
            body = {
                "inferenceConfig": self._get_inference_config(),
            }
            # Add system message if present
            if system_message:
                body["system"] = [{"text": system_message}]
            # Add tool config if present
            if tool_config:
                body["toolConfig"] = tool_config
            if self.stream:
                return self._handle_streaming_converse(
                    formatted_messages, body, available_functions, from_task, from_agent
                )
            return self._handle_converse(
                formatted_messages, body, available_functions, from_task, from_agent
            )
        except Exception as e:
            if is_context_length_exceeded(e):
                logging.error(f"Context window exceeded: {e}")
                raise LLMContextLengthExceededError(str(e)) from e
            error_msg = f"AWS Bedrock API call failed: {e!s}"
            logging.error(error_msg)
            self._emit_call_failed_event(
                error=error_msg, from_task=from_task, from_agent=from_agent
            )
            raise
    def _handle_converse(
        self,
        messages: list[dict[str, Any]],
        body: dict[str, Any],
        available_functions: Mapping[str, Any] | None = None,
        from_task: Any | None = None,
        from_agent: Any | None = None,
    ) -> str:
        """Handle non-streaming converse API call following AWS best practices."""
        try:
            # Validate messages format before API call
            if not messages:
                raise ValueError("Messages cannot be empty")
            # Ensure we have valid message structure
            for i, msg in enumerate(messages):
                if (
                    not isinstance(msg, dict)
                    or "role" not in msg
                    or "content" not in msg
                ):
                    raise ValueError(f"Invalid message format at index {i}")
            # Call Bedrock Converse API with proper error handling
            response = self.client.converse(
                modelId=self.model_id, messages=messages, **body
            )
            # Track token usage according to AWS response format
            if "usage" in response:
                self._track_token_usage_internal(response["usage"])
            # Extract content following AWS response structure
            output = response.get("output", {})
            message = output.get("message", {})
            content = message.get("content", [])
            if not content:
                logging.warning("No content in Bedrock response")
                return (
                    "I apologize, but I received an empty response. Please try again."
                )
            # Extract text content from response
            text_content = ""
            for content_block in content:
                # Handle different content block types as per AWS documentation
                if "text" in content_block:
                    text_content += content_block["text"]
                elif content_block.get("type") == "toolUse" and available_functions:
                    # Handle tool use according to AWS format
                    tool_use = content_block["toolUse"]
                    function_name = tool_use.get("name")
                    function_args = tool_use.get("input", {})
                    result = self._handle_tool_execution(
                        function_name=function_name,
                        function_args=function_args,
                        available_functions=available_functions,
                        from_task=from_task,
                        from_agent=from_agent,
                    )
                    if result is not None:
                        return result
            # Apply stop sequences if configured
            text_content = self._apply_stop_words(text_content)
            # Validate final response
            if not text_content or text_content.strip() == "":
                logging.warning("Extracted empty text content from Bedrock response")
                text_content = "I apologize, but I couldn't generate a proper response. Please try again."
            self._emit_call_completed_event(
                response=text_content,
                call_type=LLMCallType.LLM_CALL,
                from_task=from_task,
                from_agent=from_agent,
                messages=messages,
            )
            return text_content
        except ClientError as e:
            # Handle all AWS ClientError exceptions as per documentation
            error_code = e.response.get("Error", {}).get("Code", "Unknown")
            error_msg = e.response.get("Error", {}).get("Message", str(e))
            # Log the specific error for debugging
            logging.error(f"AWS Bedrock ClientError ({error_code}): {error_msg}")
            # Handle specific error codes as documented
            if error_code == "ValidationException":
                # This is the error we're seeing with Cohere
                if "last turn" in error_msg and "user message" in error_msg:
                    raise ValueError(
                        f"Conversation format error: {error_msg}. Check message alternation."
                    ) from e
                raise ValueError(f"Request validation failed: {error_msg}") from e
            if error_code == "AccessDeniedException":
                raise PermissionError(
                    f"Access denied to model {self.model_id}: {error_msg}"
                ) from e
            if error_code == "ResourceNotFoundException":
                raise ValueError(f"Model {self.model_id} not found: {error_msg}") from e
            if error_code == "ThrottlingException":
                raise RuntimeError(
                    f"API throttled, please retry later: {error_msg}"
                ) from e
            if error_code == "ModelTimeoutException":
                raise TimeoutError(f"Model request timed out: {error_msg}") from e
            if error_code == "ServiceQuotaExceededException":
                raise RuntimeError(f"Service quota exceeded: {error_msg}") from e
            if error_code == "ModelNotReadyException":
                raise RuntimeError(
                    f"Model {self.model_id} not ready: {error_msg}"
                ) from e
            if error_code == "ModelErrorException":
                raise RuntimeError(f"Model error: {error_msg}") from e
            if error_code == "InternalServerException":
                raise RuntimeError(f"Internal server error: {error_msg}") from e
            if error_code == "ServiceUnavailableException":
                raise RuntimeError(f"Service unavailable: {error_msg}") from e
            raise RuntimeError(f"Bedrock API error ({error_code}): {error_msg}") from e
        except BotoCoreError as e:
            error_msg = f"Bedrock connection error: {e}"
            logging.error(error_msg)
            raise ConnectionError(error_msg) from e
        except Exception as e:
            # Catch any other unexpected errors
            error_msg = f"Unexpected error in Bedrock converse call: {e}"
            logging.error(error_msg)
            raise RuntimeError(error_msg) from e
    def _handle_streaming_converse(
        self,
        messages: list[dict[str, Any]],
        body: dict[str, Any],
        available_functions: dict[str, Any] | None = None,
        from_task: Any | None = None,
        from_agent: Any | None = None,
    ) -> str:
        """Handle streaming converse API call."""
        full_response = ""
        try:
            response = self.client.converse_stream(
                modelId=self.model_id, messages=messages, **body
            )
            stream = response.get("stream")
            if stream:
                for event in stream:
                    if "contentBlockDelta" in event:
                        delta = event["contentBlockDelta"]["delta"]
                        if "text" in delta:
                            text_chunk = delta["text"]
                            logging.debug(f"Streaming text chunk: {text_chunk[:50]}...")
                            full_response += text_chunk
                            self._emit_stream_chunk_event(
                                chunk=text_chunk,
                                from_task=from_task,
                                from_agent=from_agent,
                            )
                    elif "messageStop" in event:
                        # Handle end of message
                        break
        except ClientError as e:
            error_msg = self._handle_client_error(e)
            raise RuntimeError(error_msg) from e
        except BotoCoreError as e:
            error_msg = f"Bedrock streaming connection error: {e}"
            logging.error(error_msg)
            raise ConnectionError(error_msg) from e
        # Apply stop words to full response
        full_response = self._apply_stop_words(full_response)
        # Ensure we don't return empty content
        if not full_response or full_response.strip() == "":
            logging.warning("Bedrock streaming returned empty content, using fallback")
            full_response = (
                "I apologize, but I couldn't generate a response. Please try again."
            )
        # Emit completion event
        self._emit_call_completed_event(
            response=full_response,
            call_type=LLMCallType.LLM_CALL,
            from_task=from_task,
            from_agent=from_agent,
            messages=messages,
        )
        return full_response
    def _format_messages_for_converse(
        self, messages: str | list[dict[str, str]]
    ) -> tuple[list[dict[str, Any]], str | None]:
        """Format messages for Converse API following AWS documentation."""
        # Use base class formatting first
        formatted_messages = self._format_messages(messages)
        converse_messages = []
        system_message = None
        for message in formatted_messages:
            role = message.get("role")
            content = message.get("content", "")
            if role == "system":
                # Extract system message - Converse API handles it separately
                if system_message:
                    system_message += f"\n\n{content}"
                else:
                    system_message = content
            else:
                # Convert to Converse API format with proper content structure
                converse_messages.append({"role": role, "content": [{"text": content}]})
        # CRITICAL: Handle model-specific conversation requirements
        # Cohere and some other models require conversation to end with user message
        if converse_messages:
            last_message = converse_messages[-1]
            if last_message["role"] == "assistant":
                # For Cohere models, add a continuation user message
                if "cohere" in self.model.lower():
                    converse_messages.append(
                        {
                            "role": "user",
                            "content": [
                                {
                                    "text": "Please continue and provide your final answer."
                                }
                            ],
                        }
                    )
                # For other models that might have similar requirements
                elif any(
                    model_family in self.model.lower()
                    for model_family in ["command", "coral"]
                ):
                    converse_messages.append(
                        {
                            "role": "user",
                            "content": [{"text": "Continue your response."}],
                        }
                    )
        # Ensure first message is from user (required by Converse API)
        if not converse_messages:
            converse_messages.append(
                {
                    "role": "user",
                    "content": [{"text": "Hello, please help me with my request."}],
                }
            )
        elif converse_messages[0]["role"] != "user":
            converse_messages.insert(
                0,
                {
                    "role": "user",
                    "content": [{"text": "Hello, please help me with my request."}],
                },
            )
        return converse_messages, system_message
    def _format_tools_for_converse(self, tools: list[dict]) -> list[dict]:
        """Convert CrewAI tools to Converse API format following AWS specification."""
        from crewai.llms.providers.utils.common import safe_tool_conversion
        converse_tools = []
        for tool in tools:
            try:
                name, description, parameters = safe_tool_conversion(tool, "Bedrock")
                converse_tool = {
                    "toolSpec": {
                        "name": name,
                        "description": description,
                    }
                }
                if parameters and isinstance(parameters, dict):
                    converse_tool["toolSpec"]["inputSchema"] = {"json": parameters}
                converse_tools.append(converse_tool)
            except Exception as e:  # noqa: PERF203
                logging.warning(
                    f"Failed to convert tool {tool.get('name', 'unknown')}: {e}"
                )
                continue
        return converse_tools
    def _get_inference_config(self) -> dict[str, Any]:
        """Get inference configuration following AWS Converse API specification."""
        config = {}
        if self.max_tokens:
            config["maxTokens"] = self.max_tokens
        if self.temperature is not None:
            config["temperature"] = float(self.temperature)
        if self.top_p is not None:
            config["topP"] = float(self.top_p)
        if self.stop_sequences:
            config["stopSequences"] = self.stop_sequences
        if self.is_claude_model and self.top_k is not None:
            # top_k is supported by Claude models
            config["topK"] = int(self.top_k)
        return config
    def _handle_client_error(self, e: ClientError) -> str:
        """Handle AWS ClientError with specific error codes and return error message."""
        error_code = e.response.get("Error", {}).get("Code", "Unknown")
        error_msg = e.response.get("Error", {}).get("Message", str(e))
        error_mapping = {
            "AccessDeniedException": f"Access denied to model {self.model_id}: {error_msg}",
            "ResourceNotFoundException": f"Model {self.model_id} not found: {error_msg}",
            "ThrottlingException": f"API throttled, please retry later: {error_msg}",
            "ValidationException": f"Invalid request: {error_msg}",
            "ModelTimeoutException": f"Model request timed out: {error_msg}",
            "ServiceQuotaExceededException": f"Service quota exceeded: {error_msg}",
            "ModelNotReadyException": f"Model {self.model_id} not ready: {error_msg}",
            "ModelErrorException": f"Model error: {error_msg}",
        }
        full_error_msg = error_mapping.get(
            error_code, f"Bedrock API error: {error_msg}"
        )
        logging.error(f"Bedrock client error ({error_code}): {full_error_msg}")
        return full_error_msg
    def _track_token_usage_internal(self, usage: dict[str, Any]) -> None:
        """Track token usage from Bedrock response."""
        input_tokens = usage.get("inputTokens", 0)
        output_tokens = usage.get("outputTokens", 0)
        total_tokens = usage.get("totalTokens", input_tokens + output_tokens)
        self._token_usage["prompt_tokens"] += input_tokens
        self._token_usage["completion_tokens"] += output_tokens
        self._token_usage["total_tokens"] += total_tokens
        self._token_usage["successful_requests"] += 1
    def supports_function_calling(self) -> bool:
        """Check if the model supports function calling."""
        return self.supports_tools
    def supports_stop_words(self) -> bool:
        """Check if the model supports stop words."""
        return True
    def get_context_window_size(self) -> int:
        """Get the context window size for the model."""
        from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO
        # Context window sizes for common Bedrock models
        context_windows = {
            "anthropic.claude-3-5-sonnet": 200000,
            "anthropic.claude-3-5-haiku": 200000,
            "anthropic.claude-3-opus": 200000,
            "anthropic.claude-3-sonnet": 200000,
            "anthropic.claude-3-haiku": 200000,
            "anthropic.claude-3-7-sonnet": 200000,
            "anthropic.claude-v2": 100000,
            "amazon.titan-text-express": 8000,
            "ai21.j2-ultra": 8192,
            "cohere.command-text": 4096,
            "meta.llama2-13b-chat": 4096,
            "meta.llama2-70b-chat": 4096,
            "meta.llama3-70b-instruct": 128000,
            "deepseek.r1": 32768,
        }
        # Find the best match for the model name
        for model_prefix, size in context_windows.items():
            if self.model.startswith(model_prefix):
                return int(size * CONTEXT_WINDOW_USAGE_RATIO)
        # Default context window size
        return int(8192 * CONTEXT_WINDOW_USAGE_RATIO)
--- a/uv.lock
+++ b/uv.lock
@@ -1020,6 +1020,9 @@ aisuite = [
 aws = [
    { name = "boto3" },
 ]
 boto3 = [
    { name = "boto3" },
 ]
 docling = [
    { name = "docling" },
 ]
@@ -1060,6 +1063,7 @@ requires-dist = [
    { name = "appdirs", specifier = ">=1.4.4" },
    { name = "blinker", specifier = ">=1.9.0" },
    { name = "boto3", marker = "extra == 'aws'", specifier = ">=1.40.38" },
    { name = "boto3", marker = "extra == 'boto3'", specifier = ">=1.40.45" },
    { name = "chromadb", specifier = "~=1.1.0" },
    { name = "click", specifier = ">=8.1.7" },
    { name = "crewai-tools", marker = "extra == 'tools'", editable = "lib/crewai-tools" },
@@ -1095,7 +1099,7 @@ requires-dist = [
    { name = "uv", specifier = ">=0.4.25" },
    { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.5" },
 ]
-provides-extras = ["aisuite", "aws", "docling", "embeddings", "litellm", "mem0", "openpyxl", "pandas", "pdfplumber", "qdrant", "tools", "voyageai", "watson"]
+provides-extras = ["aisuite", "aws", "boto3", "docling", "embeddings", "litellm", "mem0", "openpyxl", "pandas", "pdfplumber", "qdrant", "tools", "voyageai", "watson"]
 [[package]]
 name = "crewai-devtools"