mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-01 23:32:39 +00:00
Squashed 'packages/tools/' content from commit 78317b9c
git-subtree-dir: packages/tools git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
This commit is contained in:
16
crewai_tools/aws/__init__.py
Normal file
16
crewai_tools/aws/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from .s3 import S3ReaderTool, S3WriterTool
|
||||
from .bedrock import (
|
||||
BedrockKBRetrieverTool,
|
||||
BedrockInvokeAgentTool,
|
||||
create_browser_toolkit,
|
||||
create_code_interpreter_toolkit,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"S3ReaderTool",
|
||||
"S3WriterTool",
|
||||
"BedrockKBRetrieverTool",
|
||||
"BedrockInvokeAgentTool",
|
||||
"create_browser_toolkit",
|
||||
"create_code_interpreter_toolkit"
|
||||
]
|
||||
11
crewai_tools/aws/bedrock/__init__.py
Normal file
11
crewai_tools/aws/bedrock/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .knowledge_base.retriever_tool import BedrockKBRetrieverTool
|
||||
from .agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
from .browser import create_browser_toolkit
|
||||
from .code_interpreter import create_code_interpreter_toolkit
|
||||
|
||||
__all__ = [
|
||||
"BedrockKBRetrieverTool",
|
||||
"BedrockInvokeAgentTool",
|
||||
"create_browser_toolkit",
|
||||
"create_code_interpreter_toolkit"
|
||||
]
|
||||
181
crewai_tools/aws/bedrock/agents/README.md
Normal file
181
crewai_tools/aws/bedrock/agents/README.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# BedrockInvokeAgentTool
|
||||
|
||||
The `BedrockInvokeAgentTool` enables CrewAI agents to invoke Amazon Bedrock Agents and leverage their capabilities within your workflows.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- AWS credentials configured (either through environment variables or AWS CLI)
|
||||
- `boto3` and `python-dotenv` packages
|
||||
- Access to Amazon Bedrock Agents
|
||||
|
||||
## Usage
|
||||
|
||||
Here's how to use the tool with a CrewAI agent:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools.aws.bedrock.agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
|
||||
# Initialize the tool
|
||||
agent_tool = BedrockInvokeAgentTool(
|
||||
agent_id="your-agent-id",
|
||||
agent_alias_id="your-agent-alias-id"
|
||||
)
|
||||
|
||||
# Create a CrewAI agent that uses the tool
|
||||
aws_expert = Agent(
|
||||
role='AWS Service Expert',
|
||||
goal='Help users understand AWS services and quotas',
|
||||
backstory='I am an expert in AWS services and can provide detailed information about them.',
|
||||
tools=[agent_tool],
|
||||
verbose=True
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
quota_task = Task(
|
||||
description="Find out the current service quotas for EC2 in us-west-2 and explain any recent changes.",
|
||||
agent=aws_expert
|
||||
)
|
||||
|
||||
# Create a crew with the agent
|
||||
crew = Crew(
|
||||
agents=[aws_expert],
|
||||
tasks=[quota_task],
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Run the crew
|
||||
result = crew.kickoff()
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Tool Arguments
|
||||
|
||||
| Argument | Type | Required | Default | Description |
|
||||
|----------|------|----------|---------|-------------|
|
||||
| agent_id | str | Yes | None | The unique identifier of the Bedrock agent |
|
||||
| agent_alias_id | str | Yes | None | The unique identifier of the agent alias |
|
||||
| session_id | str | No | timestamp | The unique identifier of the session |
|
||||
| enable_trace | bool | No | False | Whether to enable trace for debugging |
|
||||
| end_session | bool | No | False | Whether to end the session after invocation |
|
||||
| description | str | No | None | Custom description for the tool |
|
||||
|
||||
## Environment Variables
|
||||
|
||||
```bash
|
||||
BEDROCK_AGENT_ID=your-agent-id # Alternative to passing agent_id
|
||||
BEDROCK_AGENT_ALIAS_ID=your-agent-alias-id # Alternative to passing agent_alias_id
|
||||
AWS_REGION=your-aws-region # Defaults to us-west-2
|
||||
AWS_ACCESS_KEY_ID=your-access-key # Required for AWS authentication
|
||||
AWS_SECRET_ACCESS_KEY=your-secret-key # Required for AWS authentication
|
||||
```
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Multi-Agent Workflow with Session Management
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, Process
|
||||
from crewai_tools.aws.bedrock.agents.invoke_agent_tool import BedrockInvokeAgentTool
|
||||
|
||||
# Initialize tools with session management
|
||||
initial_tool = BedrockInvokeAgentTool(
|
||||
agent_id="your-agent-id",
|
||||
agent_alias_id="your-agent-alias-id",
|
||||
session_id="custom-session-id"
|
||||
)
|
||||
|
||||
followup_tool = BedrockInvokeAgentTool(
|
||||
agent_id="your-agent-id",
|
||||
agent_alias_id="your-agent-alias-id",
|
||||
session_id="custom-session-id"
|
||||
)
|
||||
|
||||
final_tool = BedrockInvokeAgentTool(
|
||||
agent_id="your-agent-id",
|
||||
agent_alias_id="your-agent-alias-id",
|
||||
session_id="custom-session-id",
|
||||
end_session=True
|
||||
)
|
||||
|
||||
# Create agents for different stages
|
||||
researcher = Agent(
|
||||
role='AWS Service Researcher',
|
||||
goal='Gather information about AWS services',
|
||||
backstory='I am specialized in finding detailed AWS service information.',
|
||||
tools=[initial_tool]
|
||||
)
|
||||
|
||||
analyst = Agent(
|
||||
role='Service Compatibility Analyst',
|
||||
goal='Analyze service compatibility and requirements',
|
||||
backstory='I analyze AWS services for compatibility and integration possibilities.',
|
||||
tools=[followup_tool]
|
||||
)
|
||||
|
||||
summarizer = Agent(
|
||||
role='Technical Documentation Writer',
|
||||
goal='Create clear technical summaries',
|
||||
backstory='I specialize in creating clear, concise technical documentation.',
|
||||
tools=[final_tool]
|
||||
)
|
||||
|
||||
# Create tasks
|
||||
research_task = Task(
|
||||
description="Find all available AWS services in us-west-2 region.",
|
||||
agent=researcher
|
||||
)
|
||||
|
||||
analysis_task = Task(
|
||||
description="Analyze which services support IPv6 and their implementation requirements.",
|
||||
agent=analyst
|
||||
)
|
||||
|
||||
summary_task = Task(
|
||||
description="Create a summary of IPv6-compatible services and their key features.",
|
||||
agent=summarizer
|
||||
)
|
||||
|
||||
# Create a crew with the agents and tasks
|
||||
crew = Crew(
|
||||
agents=[researcher, analyst, summarizer],
|
||||
tasks=[research_task, analysis_task, summary_task],
|
||||
process=Process.sequential,
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Run the crew
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Hybrid Multi-Agent Collaborations
|
||||
- Create workflows where CrewAI agents collaborate with managed Bedrock agents running as services in AWS
|
||||
- Enable scenarios where sensitive data processing happens within your AWS environment while other agents operate externally
|
||||
- Bridge on-premises CrewAI agents with cloud-based Bedrock agents for distributed intelligence workflows
|
||||
|
||||
### Data Sovereignty and Compliance
|
||||
- Keep data-sensitive agentic workflows within your AWS environment while allowing external CrewAI agents to orchestrate tasks
|
||||
- Maintain compliance with data residency requirements by processing sensitive information only within your AWS account
|
||||
- Enable secure multi-agent collaborations where some agents cannot access your organization's private data
|
||||
|
||||
### Seamless AWS Service Integration
|
||||
- Access any AWS service through Amazon Bedrock Actions without writing complex integration code
|
||||
- Enable CrewAI agents to interact with AWS services through natural language requests
|
||||
- Leverage pre-built Bedrock agent capabilities to interact with AWS services like Bedrock Knowledge Bases, Lambda, and more
|
||||
|
||||
### Scalable Hybrid Agent Architectures
|
||||
- Offload computationally intensive tasks to managed Bedrock agents while lightweight tasks run in CrewAI
|
||||
- Scale agent processing by distributing workloads between local CrewAI agents and cloud-based Bedrock agents
|
||||
|
||||
### Cross-Organizational Agent Collaboration
|
||||
- Enable secure collaboration between your organization's CrewAI agents and partner organizations' Bedrock agents
|
||||
- Create workflows where external expertise from Bedrock agents can be incorporated without exposing sensitive data
|
||||
- Build agent ecosystems that span organizational boundaries while maintaining security and data control
|
||||
3
crewai_tools/aws/bedrock/agents/__init__.py
Normal file
3
crewai_tools/aws/bedrock/agents/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .invoke_agent_tool import BedrockInvokeAgentTool
|
||||
|
||||
__all__ = ["BedrockInvokeAgentTool"]
|
||||
176
crewai_tools/aws/bedrock/agents/invoke_agent_tool.py
Normal file
176
crewai_tools/aws/bedrock/agents/invoke_agent_tool.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from typing import Type, Optional, Dict, Any, List
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..exceptions import BedrockAgentError, BedrockValidationError
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class BedrockInvokeAgentToolInput(BaseModel):
|
||||
"""Input schema for BedrockInvokeAgentTool."""
|
||||
query: str = Field(..., description="The query to send to the agent")
|
||||
|
||||
|
||||
class BedrockInvokeAgentTool(BaseTool):
|
||||
name: str = "Bedrock Agent Invoke Tool"
|
||||
description: str = "An agent responsible for policy analysis."
|
||||
args_schema: Type[BaseModel] = BedrockInvokeAgentToolInput
|
||||
agent_id: str = None
|
||||
agent_alias_id: str = None
|
||||
session_id: str = None
|
||||
enable_trace: bool = False
|
||||
end_session: bool = False
|
||||
package_dependencies: List[str] = ["boto3"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_id: str = None,
|
||||
agent_alias_id: str = None,
|
||||
session_id: str = None,
|
||||
enable_trace: bool = False,
|
||||
end_session: bool = False,
|
||||
description: Optional[str] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""Initialize the BedrockInvokeAgentTool with agent configuration.
|
||||
|
||||
Args:
|
||||
agent_id (str): The unique identifier of the Bedrock agent
|
||||
agent_alias_id (str): The unique identifier of the agent alias
|
||||
session_id (str): The unique identifier of the session
|
||||
enable_trace (bool): Whether to enable trace for the agent invocation
|
||||
end_session (bool): Whether to end the session with the agent
|
||||
description (Optional[str]): Custom description for the tool
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Get values from environment variables if not provided
|
||||
self.agent_id = agent_id or os.getenv('BEDROCK_AGENT_ID')
|
||||
self.agent_alias_id = agent_alias_id or os.getenv('BEDROCK_AGENT_ALIAS_ID')
|
||||
self.session_id = session_id or str(int(time.time())) # Use timestamp as session ID if not provided
|
||||
self.enable_trace = enable_trace
|
||||
self.end_session = end_session
|
||||
|
||||
# Update the description if provided
|
||||
if description:
|
||||
self.description = description
|
||||
|
||||
# Validate parameters
|
||||
self._validate_parameters()
|
||||
|
||||
def _validate_parameters(self):
|
||||
"""Validate the parameters according to AWS API requirements."""
|
||||
try:
|
||||
# Validate agent_id
|
||||
if not self.agent_id:
|
||||
raise BedrockValidationError("agent_id cannot be empty")
|
||||
if not isinstance(self.agent_id, str):
|
||||
raise BedrockValidationError("agent_id must be a string")
|
||||
|
||||
# Validate agent_alias_id
|
||||
if not self.agent_alias_id:
|
||||
raise BedrockValidationError("agent_alias_id cannot be empty")
|
||||
if not isinstance(self.agent_alias_id, str):
|
||||
raise BedrockValidationError("agent_alias_id must be a string")
|
||||
|
||||
# Validate session_id if provided
|
||||
if self.session_id and not isinstance(self.session_id, str):
|
||||
raise BedrockValidationError("session_id must be a string")
|
||||
|
||||
except BedrockValidationError as e:
|
||||
raise BedrockValidationError(f"Parameter validation failed: {str(e)}")
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
except ImportError:
|
||||
raise ImportError("`boto3` package not found, please run `uv add boto3`")
|
||||
|
||||
try:
|
||||
# Initialize the Bedrock Agent Runtime client
|
||||
bedrock_agent = boto3.client(
|
||||
"bedrock-agent-runtime",
|
||||
region_name=os.getenv('AWS_REGION', os.getenv('AWS_DEFAULT_REGION', 'us-west-2'))
|
||||
)
|
||||
|
||||
# Format the prompt with current time
|
||||
current_utc = datetime.now(timezone.utc)
|
||||
prompt = f"""
|
||||
The current time is: {current_utc}
|
||||
|
||||
Below is the users query or task. Complete it and answer it consicely and to the point:
|
||||
{query}
|
||||
"""
|
||||
|
||||
# Invoke the agent
|
||||
response = bedrock_agent.invoke_agent(
|
||||
agentId=self.agent_id,
|
||||
agentAliasId=self.agent_alias_id,
|
||||
sessionId=self.session_id,
|
||||
inputText=prompt,
|
||||
enableTrace=self.enable_trace,
|
||||
endSession=self.end_session
|
||||
)
|
||||
|
||||
# Process the response
|
||||
completion = ""
|
||||
|
||||
# Check if response contains a completion field
|
||||
if 'completion' in response:
|
||||
# Process streaming response format
|
||||
for event in response.get('completion', []):
|
||||
if 'chunk' in event and 'bytes' in event['chunk']:
|
||||
chunk_bytes = event['chunk']['bytes']
|
||||
if isinstance(chunk_bytes, (bytes, bytearray)):
|
||||
completion += chunk_bytes.decode('utf-8')
|
||||
else:
|
||||
completion += str(chunk_bytes)
|
||||
|
||||
# If no completion found in streaming format, try direct format
|
||||
if not completion and 'chunk' in response and 'bytes' in response['chunk']:
|
||||
chunk_bytes = response['chunk']['bytes']
|
||||
if isinstance(chunk_bytes, (bytes, bytearray)):
|
||||
completion = chunk_bytes.decode('utf-8')
|
||||
else:
|
||||
completion = str(chunk_bytes)
|
||||
|
||||
# If still no completion, return debug info
|
||||
if not completion:
|
||||
debug_info = {
|
||||
"error": "Could not extract completion from response",
|
||||
"response_keys": list(response.keys())
|
||||
}
|
||||
|
||||
# Add more debug info
|
||||
if 'chunk' in response:
|
||||
debug_info["chunk_keys"] = list(response['chunk'].keys())
|
||||
|
||||
raise BedrockAgentError(f"Failed to extract completion: {json.dumps(debug_info, indent=2)}")
|
||||
|
||||
return completion
|
||||
|
||||
except ClientError as e:
|
||||
error_code = "Unknown"
|
||||
error_message = str(e)
|
||||
|
||||
# Try to extract error code if available
|
||||
if hasattr(e, 'response') and 'Error' in e.response:
|
||||
error_code = e.response['Error'].get('Code', 'Unknown')
|
||||
error_message = e.response['Error'].get('Message', str(e))
|
||||
|
||||
raise BedrockAgentError(f"Error ({error_code}): {error_message}")
|
||||
except BedrockAgentError:
|
||||
# Re-raise BedrockAgentError exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
raise BedrockAgentError(f"Unexpected error: {str(e)}")
|
||||
158
crewai_tools/aws/bedrock/browser/README.md
Normal file
158
crewai_tools/aws/bedrock/browser/README.md
Normal file
@@ -0,0 +1,158 @@
|
||||
# AWS Bedrock Browser Tools
|
||||
|
||||
This toolkit provides a set of tools for interacting with web browsers through AWS Bedrock Browser. It enables your CrewAI agents to navigate websites, extract content, click elements, and more.
|
||||
|
||||
## Features
|
||||
|
||||
- Navigate to URLs and browse the web
|
||||
- Extract text and hyperlinks from pages
|
||||
- Click on elements using CSS selectors
|
||||
- Navigate back through browser history
|
||||
- Get information about the current webpage
|
||||
- Multiple browser sessions with thread-based isolation
|
||||
|
||||
## Installation
|
||||
|
||||
Ensure you have the necessary dependencies:
|
||||
|
||||
```bash
|
||||
uv add crewai-tools bedrock-agentcore beautifulsoup4 playwright nest-asyncio
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai_tools.aws.bedrock.browser import create_browser_toolkit
|
||||
|
||||
# Create the browser toolkit
|
||||
toolkit, browser_tools = create_browser_toolkit(region="us-west-2")
|
||||
|
||||
# Create the Bedrock LLM
|
||||
llm = LLM(
|
||||
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
region_name="us-west-2",
|
||||
)
|
||||
|
||||
# Create a CrewAI agent that uses the browser tools
|
||||
research_agent = Agent(
|
||||
role="Web Researcher",
|
||||
goal="Research and summarize web content",
|
||||
backstory="You're an expert at finding information online.",
|
||||
tools=browser_tools,
|
||||
llm=llm
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
research_task = Task(
|
||||
description="Navigate to https://example.com and extract all text content. Summarize the main points.",
|
||||
expected_output="A list of bullet points containing the most important information on https://example.com. Plus, a description of the tool calls used, and actions performed to get to the page.",
|
||||
agent=research_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[research_agent],
|
||||
tasks=[research_task]
|
||||
)
|
||||
result = crew.kickoff()
|
||||
|
||||
print(f"\n***Final result:***\n\n{result}")
|
||||
|
||||
# Clean up browser resources when done
|
||||
toolkit.sync_cleanup()
|
||||
```
|
||||
|
||||
### Available Tools
|
||||
|
||||
The toolkit provides the following tools:
|
||||
|
||||
1. `navigate_browser` - Navigate to a URL
|
||||
2. `click_element` - Click on an element using CSS selectors
|
||||
3. `extract_text` - Extract all text from the current webpage
|
||||
4. `extract_hyperlinks` - Extract all hyperlinks from the current webpage
|
||||
5. `get_elements` - Get elements matching a CSS selector
|
||||
6. `navigate_back` - Navigate to the previous page
|
||||
7. `current_webpage` - Get information about the current webpage
|
||||
|
||||
### Advanced Usage (with async)
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai_tools.aws.bedrock.browser import create_browser_toolkit
|
||||
|
||||
async def main():
|
||||
|
||||
# Create the browser toolkit with specific AWS region
|
||||
toolkit, browser_tools = create_browser_toolkit(region="us-west-2")
|
||||
tools_by_name = toolkit.get_tools_by_name()
|
||||
|
||||
# Create the Bedrock LLM
|
||||
llm = LLM(
|
||||
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
region_name="us-west-2",
|
||||
)
|
||||
|
||||
# Create agents with specific tools
|
||||
navigator_agent = Agent(
|
||||
role="Navigator",
|
||||
goal="Find specific information across websites",
|
||||
backstory="You navigate through websites to locate information.",
|
||||
tools=[
|
||||
tools_by_name["navigate_browser"],
|
||||
tools_by_name["click_element"],
|
||||
tools_by_name["navigate_back"]
|
||||
],
|
||||
llm=llm
|
||||
)
|
||||
|
||||
content_agent = Agent(
|
||||
role="Content Extractor",
|
||||
goal="Extract and analyze webpage content",
|
||||
backstory="You extract and analyze content from webpages.",
|
||||
tools=[
|
||||
tools_by_name["extract_text"],
|
||||
tools_by_name["extract_hyperlinks"],
|
||||
tools_by_name["get_elements"]
|
||||
],
|
||||
llm=llm
|
||||
)
|
||||
|
||||
# Create tasks for the agents
|
||||
navigation_task = Task(
|
||||
description="Navigate to https://example.com, then click on the the 'More information...' link.",
|
||||
expected_output="The status of the tool calls for this task.",
|
||||
agent=navigator_agent,
|
||||
)
|
||||
|
||||
extraction_task = Task(
|
||||
description="Extract all text from the current page and summarize it.",
|
||||
expected_output="The summary of the page, and a description of the tool calls used, and actions performed to get to the page.",
|
||||
agent=content_agent,
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[navigator_agent, content_agent],
|
||||
tasks=[navigation_task, extraction_task]
|
||||
)
|
||||
|
||||
result = await crew.kickoff_async()
|
||||
|
||||
# Clean up browser resources when done
|
||||
toolkit.sync_cleanup()
|
||||
|
||||
return result
|
||||
|
||||
if __name__ == "__main__":
|
||||
result = asyncio.run(main())
|
||||
print(f"\n***Final result:***\n\n{result}")
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- AWS account with access to Bedrock AgentCore API
|
||||
- Properly configured AWS credentials
|
||||
3
crewai_tools/aws/bedrock/browser/__init__.py
Normal file
3
crewai_tools/aws/bedrock/browser/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .browser_toolkit import BrowserToolkit, create_browser_toolkit
|
||||
|
||||
__all__ = ["BrowserToolkit", "create_browser_toolkit"]
|
||||
260
crewai_tools/aws/bedrock/browser/browser_session_manager.py
Normal file
260
crewai_tools/aws/bedrock/browser/browser_session_manager.py
Normal file
@@ -0,0 +1,260 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Dict, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
from playwright.sync_api import Browser as SyncBrowser
|
||||
from bedrock_agentcore.tools.browser_client import BrowserClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BrowserSessionManager:
|
||||
"""
|
||||
Manages browser sessions for different threads.
|
||||
|
||||
This class maintains separate browser sessions for different threads,
|
||||
enabling concurrent usage of browsers in multi-threaded environments.
|
||||
Browsers are created lazily only when needed by tools.
|
||||
"""
|
||||
|
||||
def __init__(self, region: str = "us-west-2"):
|
||||
"""
|
||||
Initialize the browser session manager.
|
||||
|
||||
Args:
|
||||
region: AWS region for browser client
|
||||
"""
|
||||
self.region = region
|
||||
self._async_sessions: Dict[str, Tuple[BrowserClient, AsyncBrowser]] = {}
|
||||
self._sync_sessions: Dict[str, Tuple[BrowserClient, SyncBrowser]] = {}
|
||||
|
||||
async def get_async_browser(self, thread_id: str) -> AsyncBrowser:
|
||||
"""
|
||||
Get or create an async browser for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Unique identifier for the thread requesting the browser
|
||||
|
||||
Returns:
|
||||
An async browser instance specific to the thread
|
||||
"""
|
||||
if thread_id in self._async_sessions:
|
||||
return self._async_sessions[thread_id][1]
|
||||
|
||||
return await self._create_async_browser_session(thread_id)
|
||||
|
||||
def get_sync_browser(self, thread_id: str) -> SyncBrowser:
|
||||
"""
|
||||
Get or create a sync browser for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Unique identifier for the thread requesting the browser
|
||||
|
||||
Returns:
|
||||
A sync browser instance specific to the thread
|
||||
"""
|
||||
if thread_id in self._sync_sessions:
|
||||
return self._sync_sessions[thread_id][1]
|
||||
|
||||
return self._create_sync_browser_session(thread_id)
|
||||
|
||||
async def _create_async_browser_session(self, thread_id: str) -> AsyncBrowser:
|
||||
"""
|
||||
Create a new async browser session for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Unique identifier for the thread
|
||||
|
||||
Returns:
|
||||
The newly created async browser instance
|
||||
|
||||
Raises:
|
||||
Exception: If browser session creation fails
|
||||
"""
|
||||
from bedrock_agentcore.tools.browser_client import BrowserClient
|
||||
browser_client = BrowserClient(region=self.region)
|
||||
|
||||
try:
|
||||
# Start browser session
|
||||
browser_client.start()
|
||||
|
||||
# Get WebSocket connection info
|
||||
ws_url, headers = browser_client.generate_ws_headers()
|
||||
|
||||
logger.info(
|
||||
f"Connecting to async WebSocket endpoint for thread {thread_id}: {ws_url}"
|
||||
)
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# Connect to browser using Playwright
|
||||
playwright = await async_playwright().start()
|
||||
browser = await playwright.chromium.connect_over_cdp(
|
||||
endpoint_url=ws_url, headers=headers, timeout=30000
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully connected to async browser for thread {thread_id}"
|
||||
)
|
||||
|
||||
# Store session resources
|
||||
self._async_sessions[thread_id] = (browser_client, browser)
|
||||
|
||||
return browser
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to create async browser session for thread {thread_id}: {e}"
|
||||
)
|
||||
|
||||
# Clean up resources if session creation fails
|
||||
if browser_client:
|
||||
try:
|
||||
browser_client.stop()
|
||||
except Exception as cleanup_error:
|
||||
logger.warning(f"Error cleaning up browser client: {cleanup_error}")
|
||||
|
||||
raise
|
||||
|
||||
def _create_sync_browser_session(self, thread_id: str) -> SyncBrowser:
|
||||
"""
|
||||
Create a new sync browser session for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Unique identifier for the thread
|
||||
|
||||
Returns:
|
||||
The newly created sync browser instance
|
||||
|
||||
Raises:
|
||||
Exception: If browser session creation fails
|
||||
"""
|
||||
from bedrock_agentcore.tools.browser_client import BrowserClient
|
||||
browser_client = BrowserClient(region=self.region)
|
||||
|
||||
try:
|
||||
# Start browser session
|
||||
browser_client.start()
|
||||
|
||||
# Get WebSocket connection info
|
||||
ws_url, headers = browser_client.generate_ws_headers()
|
||||
|
||||
logger.info(
|
||||
f"Connecting to sync WebSocket endpoint for thread {thread_id}: {ws_url}"
|
||||
)
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
# Connect to browser using Playwright
|
||||
playwright = sync_playwright().start()
|
||||
browser = playwright.chromium.connect_over_cdp(
|
||||
endpoint_url=ws_url, headers=headers, timeout=30000
|
||||
)
|
||||
logger.info(
|
||||
f"Successfully connected to sync browser for thread {thread_id}"
|
||||
)
|
||||
|
||||
# Store session resources
|
||||
self._sync_sessions[thread_id] = (browser_client, browser)
|
||||
|
||||
return browser
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to create sync browser session for thread {thread_id}: {e}"
|
||||
)
|
||||
|
||||
# Clean up resources if session creation fails
|
||||
if browser_client:
|
||||
try:
|
||||
browser_client.stop()
|
||||
except Exception as cleanup_error:
|
||||
logger.warning(f"Error cleaning up browser client: {cleanup_error}")
|
||||
|
||||
raise
|
||||
|
||||
async def close_async_browser(self, thread_id: str) -> None:
|
||||
"""
|
||||
Close the async browser session for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Unique identifier for the thread
|
||||
"""
|
||||
if thread_id not in self._async_sessions:
|
||||
logger.warning(f"No async browser session found for thread {thread_id}")
|
||||
return
|
||||
|
||||
browser_client, browser = self._async_sessions[thread_id]
|
||||
|
||||
# Close browser
|
||||
if browser:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error closing async browser for thread {thread_id}: {e}"
|
||||
)
|
||||
|
||||
# Stop browser client
|
||||
if browser_client:
|
||||
try:
|
||||
browser_client.stop()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error stopping browser client for thread {thread_id}: {e}"
|
||||
)
|
||||
|
||||
# Remove session from dictionary
|
||||
del self._async_sessions[thread_id]
|
||||
logger.info(f"Async browser session cleaned up for thread {thread_id}")
|
||||
|
||||
def close_sync_browser(self, thread_id: str) -> None:
|
||||
"""
|
||||
Close the sync browser session for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Unique identifier for the thread
|
||||
"""
|
||||
if thread_id not in self._sync_sessions:
|
||||
logger.warning(f"No sync browser session found for thread {thread_id}")
|
||||
return
|
||||
|
||||
browser_client, browser = self._sync_sessions[thread_id]
|
||||
|
||||
# Close browser
|
||||
if browser:
|
||||
try:
|
||||
browser.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error closing sync browser for thread {thread_id}: {e}"
|
||||
)
|
||||
|
||||
# Stop browser client
|
||||
if browser_client:
|
||||
try:
|
||||
browser_client.stop()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error stopping browser client for thread {thread_id}: {e}"
|
||||
)
|
||||
|
||||
# Remove session from dictionary
|
||||
del self._sync_sessions[thread_id]
|
||||
logger.info(f"Sync browser session cleaned up for thread {thread_id}")
|
||||
|
||||
async def close_all_browsers(self) -> None:
|
||||
"""Close all browser sessions."""
|
||||
# Close all async browsers
|
||||
async_thread_ids = list(self._async_sessions.keys())
|
||||
for thread_id in async_thread_ids:
|
||||
await self.close_async_browser(thread_id)
|
||||
|
||||
# Close all sync browsers
|
||||
sync_thread_ids = list(self._sync_sessions.keys())
|
||||
for thread_id in sync_thread_ids:
|
||||
self.close_sync_browser(thread_id)
|
||||
|
||||
logger.info("All browser sessions closed")
|
||||
587
crewai_tools/aws/bedrock/browser/browser_toolkit.py
Normal file
587
crewai_tools/aws/bedrock/browser/browser_toolkit.py
Normal file
@@ -0,0 +1,587 @@
|
||||
"""Toolkit for navigating web with AWS browser."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import asyncio
|
||||
from typing import Dict, List, Tuple, Any, Type
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .browser_session_manager import BrowserSessionManager
|
||||
from .utils import aget_current_page, get_current_page
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Input schemas
|
||||
class NavigateToolInput(BaseModel):
|
||||
"""Input for NavigateTool."""
|
||||
url: str = Field(description="URL to navigate to")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
class ClickToolInput(BaseModel):
|
||||
"""Input for ClickTool."""
|
||||
selector: str = Field(description="CSS selector for the element to click on")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
class GetElementsToolInput(BaseModel):
|
||||
"""Input for GetElementsTool."""
|
||||
selector: str = Field(description="CSS selector for elements to get")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
class ExtractTextToolInput(BaseModel):
|
||||
"""Input for ExtractTextTool."""
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
class ExtractHyperlinksToolInput(BaseModel):
|
||||
"""Input for ExtractHyperlinksTool."""
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
class NavigateBackToolInput(BaseModel):
|
||||
"""Input for NavigateBackTool."""
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
class CurrentWebPageToolInput(BaseModel):
|
||||
"""Input for CurrentWebPageTool."""
|
||||
thread_id: str = Field(default="default", description="Thread ID for the browser session")
|
||||
|
||||
|
||||
# Base tool class
|
||||
class BrowserBaseTool(BaseTool):
|
||||
"""Base class for browser tools."""
|
||||
|
||||
def __init__(self, session_manager: BrowserSessionManager):
|
||||
"""Initialize with a session manager."""
|
||||
super().__init__()
|
||||
self._session_manager = session_manager
|
||||
|
||||
if self._is_in_asyncio_loop() and hasattr(self, '_arun'):
|
||||
self._original_run = self._run
|
||||
# Override _run to use _arun when in an asyncio loop
|
||||
def patched_run(*args, **kwargs):
|
||||
try:
|
||||
import nest_asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
nest_asyncio.apply(loop)
|
||||
return asyncio.get_event_loop().run_until_complete(
|
||||
self._arun(*args, **kwargs)
|
||||
)
|
||||
except Exception as e:
|
||||
return f"Error in patched _run: {str(e)}"
|
||||
self._run = patched_run
|
||||
|
||||
async def get_async_page(self, thread_id: str) -> Any:
|
||||
"""Get or create a page for the specified thread."""
|
||||
browser = await self._session_manager.get_async_browser(thread_id)
|
||||
page = await aget_current_page(browser)
|
||||
return page
|
||||
|
||||
def get_sync_page(self, thread_id: str) -> Any:
|
||||
"""Get or create a page for the specified thread."""
|
||||
browser = self._session_manager.get_sync_browser(thread_id)
|
||||
page = get_current_page(browser)
|
||||
return page
|
||||
|
||||
def _is_in_asyncio_loop(self) -> bool:
|
||||
"""Check if we're currently in an asyncio event loop."""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
return loop.is_running()
|
||||
except RuntimeError:
|
||||
return False
|
||||
|
||||
|
||||
# Tool classes
|
||||
class NavigateTool(BrowserBaseTool):
|
||||
"""Tool for navigating a browser to a URL."""
|
||||
|
||||
name: str = "navigate_browser"
|
||||
description: str = "Navigate a browser to the specified URL"
|
||||
args_schema: Type[BaseModel] = NavigateToolInput
|
||||
|
||||
def _run(self, url: str, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Get page for this thread
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Validate URL scheme
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.scheme not in ("http", "https"):
|
||||
raise ValueError("URL scheme must be 'http' or 'https'")
|
||||
|
||||
# Navigate to URL
|
||||
response = page.goto(url)
|
||||
status = response.status if response else "unknown"
|
||||
return f"Navigating to {url} returned status code {status}"
|
||||
except Exception as e:
|
||||
return f"Error navigating to {url}: {str(e)}"
|
||||
|
||||
async def _arun(self, url: str, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Get page for this thread
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Validate URL scheme
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.scheme not in ("http", "https"):
|
||||
raise ValueError("URL scheme must be 'http' or 'https'")
|
||||
|
||||
# Navigate to URL
|
||||
response = await page.goto(url)
|
||||
status = response.status if response else "unknown"
|
||||
return f"Navigating to {url} returned status code {status}"
|
||||
except Exception as e:
|
||||
return f"Error navigating to {url}: {str(e)}"
|
||||
|
||||
|
||||
class ClickTool(BrowserBaseTool):
|
||||
"""Tool for clicking on an element with the given CSS selector."""
|
||||
|
||||
name: str = "click_element"
|
||||
description: str = "Click on an element with the given CSS selector"
|
||||
args_schema: Type[BaseModel] = ClickToolInput
|
||||
|
||||
visible_only: bool = True
|
||||
"""Whether to consider only visible elements."""
|
||||
playwright_strict: bool = False
|
||||
"""Whether to employ Playwright's strict mode when clicking on elements."""
|
||||
playwright_timeout: float = 1_000
|
||||
"""Timeout (in ms) for Playwright to wait for element to be ready."""
|
||||
|
||||
def _selector_effective(self, selector: str) -> str:
|
||||
if not self.visible_only:
|
||||
return selector
|
||||
return f"{selector} >> visible=1"
|
||||
|
||||
def _run(self, selector: str, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Click on the element
|
||||
selector_effective = self._selector_effective(selector=selector)
|
||||
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
||||
|
||||
try:
|
||||
page.click(
|
||||
selector_effective,
|
||||
strict=self.playwright_strict,
|
||||
timeout=self.playwright_timeout,
|
||||
)
|
||||
except PlaywrightTimeoutError:
|
||||
return f"Unable to click on element '{selector}'"
|
||||
except Exception as click_error:
|
||||
return f"Unable to click on element '{selector}': {str(click_error)}"
|
||||
|
||||
return f"Clicked element '{selector}'"
|
||||
except Exception as e:
|
||||
return f"Error clicking on element: {str(e)}"
|
||||
|
||||
async def _arun(self, selector: str, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Click on the element
|
||||
selector_effective = self._selector_effective(selector=selector)
|
||||
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
||||
|
||||
try:
|
||||
await page.click(
|
||||
selector_effective,
|
||||
strict=self.playwright_strict,
|
||||
timeout=self.playwright_timeout,
|
||||
)
|
||||
except PlaywrightTimeoutError:
|
||||
return f"Unable to click on element '{selector}'"
|
||||
except Exception as click_error:
|
||||
return f"Unable to click on element '{selector}': {str(click_error)}"
|
||||
|
||||
return f"Clicked element '{selector}'"
|
||||
except Exception as e:
|
||||
return f"Error clicking on element: {str(e)}"
|
||||
|
||||
|
||||
class NavigateBackTool(BrowserBaseTool):
|
||||
"""Tool for navigating back in browser history."""
|
||||
name: str = "navigate_back"
|
||||
description: str = "Navigate back to the previous page"
|
||||
args_schema: Type[BaseModel] = NavigateBackToolInput
|
||||
|
||||
def _run(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Navigate back
|
||||
try:
|
||||
page.go_back()
|
||||
return "Navigated back to the previous page"
|
||||
except Exception as nav_error:
|
||||
return f"Unable to navigate back: {str(nav_error)}"
|
||||
except Exception as e:
|
||||
return f"Error navigating back: {str(e)}"
|
||||
|
||||
async def _arun(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Navigate back
|
||||
try:
|
||||
await page.go_back()
|
||||
return "Navigated back to the previous page"
|
||||
except Exception as nav_error:
|
||||
return f"Unable to navigate back: {str(nav_error)}"
|
||||
except Exception as e:
|
||||
return f"Error navigating back: {str(e)}"
|
||||
|
||||
|
||||
class ExtractTextTool(BrowserBaseTool):
|
||||
"""Tool for extracting text from a webpage."""
|
||||
name: str = "extract_text"
|
||||
description: str = "Extract all the text on the current webpage"
|
||||
args_schema: Type[BaseModel] = ExtractTextToolInput
|
||||
|
||||
def _run(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Import BeautifulSoup
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
return (
|
||||
"The 'beautifulsoup4' package is required to use this tool."
|
||||
" Please install it with 'pip install beautifulsoup4'."
|
||||
)
|
||||
|
||||
# Get the current page
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Extract text
|
||||
content = page.content()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
return soup.get_text(separator="\n").strip()
|
||||
except Exception as e:
|
||||
return f"Error extracting text: {str(e)}"
|
||||
|
||||
async def _arun(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Import BeautifulSoup
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
return (
|
||||
"The 'beautifulsoup4' package is required to use this tool."
|
||||
" Please install it with 'pip install beautifulsoup4'."
|
||||
)
|
||||
|
||||
# Get the current page
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Extract text
|
||||
content = await page.content()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
return soup.get_text(separator="\n").strip()
|
||||
except Exception as e:
|
||||
return f"Error extracting text: {str(e)}"
|
||||
|
||||
|
||||
class ExtractHyperlinksTool(BrowserBaseTool):
|
||||
"""Tool for extracting hyperlinks from a webpage."""
|
||||
name: str = "extract_hyperlinks"
|
||||
description: str = "Extract all hyperlinks on the current webpage"
|
||||
args_schema: Type[BaseModel] = ExtractHyperlinksToolInput
|
||||
|
||||
def _run(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Import BeautifulSoup
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
return (
|
||||
"The 'beautifulsoup4' package is required to use this tool."
|
||||
" Please install it with 'pip install beautifulsoup4'."
|
||||
)
|
||||
|
||||
# Get the current page
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Extract hyperlinks
|
||||
content = page.content()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
links = []
|
||||
for link in soup.find_all("a", href=True):
|
||||
text = link.get_text().strip()
|
||||
href = link["href"]
|
||||
if href.startswith("http") or href.startswith("https"):
|
||||
links.append({"text": text, "url": href})
|
||||
|
||||
if not links:
|
||||
return "No hyperlinks found on the current page."
|
||||
|
||||
return json.dumps(links, indent=2)
|
||||
except Exception as e:
|
||||
return f"Error extracting hyperlinks: {str(e)}"
|
||||
|
||||
async def _arun(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Import BeautifulSoup
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
return (
|
||||
"The 'beautifulsoup4' package is required to use this tool."
|
||||
" Please install it with 'pip install beautifulsoup4'."
|
||||
)
|
||||
|
||||
# Get the current page
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Extract hyperlinks
|
||||
content = await page.content()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
links = []
|
||||
for link in soup.find_all("a", href=True):
|
||||
text = link.get_text().strip()
|
||||
href = link["href"]
|
||||
if href.startswith("http") or href.startswith("https"):
|
||||
links.append({"text": text, "url": href})
|
||||
|
||||
if not links:
|
||||
return "No hyperlinks found on the current page."
|
||||
|
||||
return json.dumps(links, indent=2)
|
||||
except Exception as e:
|
||||
return f"Error extracting hyperlinks: {str(e)}"
|
||||
|
||||
|
||||
class GetElementsTool(BrowserBaseTool):
|
||||
"""Tool for getting elements from a webpage."""
|
||||
name: str = "get_elements"
|
||||
description: str = "Get elements from the webpage using a CSS selector"
|
||||
args_schema: Type[BaseModel] = GetElementsToolInput
|
||||
|
||||
def _run(self, selector: str, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Get elements
|
||||
elements = page.query_selector_all(selector)
|
||||
if not elements:
|
||||
return f"No elements found with selector '{selector}'"
|
||||
|
||||
elements_text = []
|
||||
for i, element in enumerate(elements):
|
||||
text = element.text_content()
|
||||
elements_text.append(f"Element {i+1}: {text.strip()}")
|
||||
|
||||
return "\n".join(elements_text)
|
||||
except Exception as e:
|
||||
return f"Error getting elements: {str(e)}"
|
||||
|
||||
async def _arun(self, selector: str, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Get elements
|
||||
elements = await page.query_selector_all(selector)
|
||||
if not elements:
|
||||
return f"No elements found with selector '{selector}'"
|
||||
|
||||
elements_text = []
|
||||
for i, element in enumerate(elements):
|
||||
text = await element.text_content()
|
||||
elements_text.append(f"Element {i+1}: {text.strip()}")
|
||||
|
||||
return "\n".join(elements_text)
|
||||
except Exception as e:
|
||||
return f"Error getting elements: {str(e)}"
|
||||
|
||||
|
||||
class CurrentWebPageTool(BrowserBaseTool):
|
||||
"""Tool for getting information about the current webpage."""
|
||||
name: str = "current_webpage"
|
||||
description: str = "Get information about the current webpage"
|
||||
args_schema: Type[BaseModel] = CurrentWebPageToolInput
|
||||
|
||||
def _run(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the sync tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = self.get_sync_page(thread_id)
|
||||
|
||||
# Get information
|
||||
url = page.url
|
||||
title = page.title()
|
||||
return f"URL: {url}\nTitle: {title}"
|
||||
except Exception as e:
|
||||
return f"Error getting current webpage info: {str(e)}"
|
||||
|
||||
async def _arun(self, thread_id: str = "default", **kwargs) -> str:
|
||||
"""Use the async tool."""
|
||||
try:
|
||||
# Get the current page
|
||||
page = await self.get_async_page(thread_id)
|
||||
|
||||
# Get information
|
||||
url = page.url
|
||||
title = await page.title()
|
||||
return f"URL: {url}\nTitle: {title}"
|
||||
except Exception as e:
|
||||
return f"Error getting current webpage info: {str(e)}"
|
||||
|
||||
|
||||
class BrowserToolkit:
|
||||
"""Toolkit for navigating web with AWS Bedrock browser.
|
||||
|
||||
This toolkit provides a set of tools for working with a remote browser
|
||||
and supports multiple threads by maintaining separate browser sessions
|
||||
for each thread ID. Browsers are created lazily only when needed.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools.aws.bedrock.browser import create_browser_toolkit
|
||||
|
||||
# Create the browser toolkit
|
||||
toolkit, browser_tools = create_browser_toolkit(region="us-west-2")
|
||||
|
||||
# Create a CrewAI agent that uses the browser tools
|
||||
research_agent = Agent(
|
||||
role="Web Researcher",
|
||||
goal="Research and summarize web content",
|
||||
backstory="You're an expert at finding information online.",
|
||||
tools=browser_tools
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
research_task = Task(
|
||||
description="Navigate to https://example.com and extract all text content. Summarize the main points.",
|
||||
agent=research_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[research_agent],
|
||||
tasks=[research_task]
|
||||
)
|
||||
result = crew.kickoff()
|
||||
|
||||
# Clean up browser resources when done
|
||||
import asyncio
|
||||
asyncio.run(toolkit.cleanup())
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, region: str = "us-west-2"):
|
||||
"""
|
||||
Initialize the toolkit
|
||||
|
||||
Args:
|
||||
region: AWS region for the browser client
|
||||
"""
|
||||
self.region = region
|
||||
self.session_manager = BrowserSessionManager(region=region)
|
||||
self.tools: List[BaseTool] = []
|
||||
self._nest_current_loop()
|
||||
self._setup_tools()
|
||||
|
||||
def _nest_current_loop(self):
|
||||
"""Apply nest_asyncio if we're in an asyncio loop."""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
try:
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply(loop)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to apply nest_asyncio: {str(e)}")
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
def _setup_tools(self) -> None:
|
||||
"""Initialize tools without creating any browsers."""
|
||||
self.tools = [
|
||||
NavigateTool(session_manager=self.session_manager),
|
||||
ClickTool(session_manager=self.session_manager),
|
||||
NavigateBackTool(session_manager=self.session_manager),
|
||||
ExtractTextTool(session_manager=self.session_manager),
|
||||
ExtractHyperlinksTool(session_manager=self.session_manager),
|
||||
GetElementsTool(session_manager=self.session_manager),
|
||||
CurrentWebPageTool(session_manager=self.session_manager)
|
||||
]
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""
|
||||
Get the list of browser tools
|
||||
|
||||
Returns:
|
||||
List of CrewAI tools
|
||||
"""
|
||||
return self.tools
|
||||
|
||||
def get_tools_by_name(self) -> Dict[str, BaseTool]:
|
||||
"""
|
||||
Get a dictionary of tools mapped by their names
|
||||
|
||||
Returns:
|
||||
Dictionary of {tool_name: tool}
|
||||
"""
|
||||
return {tool.name: tool for tool in self.tools}
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up all browser sessions asynchronously"""
|
||||
await self.session_manager.close_all_browsers()
|
||||
logger.info("All browser sessions cleaned up")
|
||||
|
||||
def sync_cleanup(self) -> None:
|
||||
"""Clean up all browser sessions from synchronous code"""
|
||||
import asyncio
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
asyncio.create_task(self.cleanup())
|
||||
else:
|
||||
loop.run_until_complete(self.cleanup())
|
||||
except RuntimeError:
|
||||
asyncio.run(self.cleanup())
|
||||
|
||||
|
||||
def create_browser_toolkit(
|
||||
region: str = "us-west-2",
|
||||
) -> Tuple[BrowserToolkit, List[BaseTool]]:
|
||||
"""
|
||||
Create a BrowserToolkit
|
||||
|
||||
Args:
|
||||
region: AWS region for browser client
|
||||
|
||||
Returns:
|
||||
Tuple of (toolkit, tools)
|
||||
"""
|
||||
toolkit = BrowserToolkit(region=region)
|
||||
tools = toolkit.get_tools()
|
||||
return toolkit, tools
|
||||
43
crewai_tools/aws/bedrock/browser/utils.py
Normal file
43
crewai_tools/aws/bedrock/browser/utils.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
from playwright.async_api import Page as AsyncPage
|
||||
from playwright.sync_api import Browser as SyncBrowser
|
||||
from playwright.sync_api import Page as SyncPage
|
||||
|
||||
|
||||
async def aget_current_page(browser: Union[AsyncBrowser, Any]) -> AsyncPage:
|
||||
"""
|
||||
Asynchronously get the current page of the browser.
|
||||
Args:
|
||||
browser: The browser (AsyncBrowser) to get the current page from.
|
||||
Returns:
|
||||
AsyncPage: The current page.
|
||||
"""
|
||||
if not browser.contexts:
|
||||
context = await browser.new_context()
|
||||
return await context.new_page()
|
||||
context = browser.contexts[0]
|
||||
if not context.pages:
|
||||
return await context.new_page()
|
||||
return context.pages[-1]
|
||||
|
||||
|
||||
def get_current_page(browser: Union[SyncBrowser, Any]) -> SyncPage:
|
||||
"""
|
||||
Get the current page of the browser.
|
||||
Args:
|
||||
browser: The browser to get the current page from.
|
||||
Returns:
|
||||
SyncPage: The current page.
|
||||
"""
|
||||
if not browser.contexts:
|
||||
context = browser.new_context()
|
||||
return context.new_page()
|
||||
context = browser.contexts[0]
|
||||
if not context.pages:
|
||||
return context.new_page()
|
||||
return context.pages[-1]
|
||||
217
crewai_tools/aws/bedrock/code_interpreter/README.md
Normal file
217
crewai_tools/aws/bedrock/code_interpreter/README.md
Normal file
@@ -0,0 +1,217 @@
|
||||
# AWS Bedrock Code Interpreter Tools
|
||||
|
||||
This toolkit provides a set of tools for interacting with the AWS Bedrock Code Interpreter environment. It enables your CrewAI agents to execute code, run shell commands, manage files, and perform computational tasks in a secure, isolated environment.
|
||||
|
||||
## Features
|
||||
|
||||
- Execute code in various languages (primarily Python)
|
||||
- Run shell commands in the environment
|
||||
- Read, write, list, and delete files
|
||||
- Manage long-running tasks asynchronously
|
||||
- Multiple code interpreter sessions with thread-based isolation
|
||||
|
||||
## Installation
|
||||
|
||||
Ensure you have the necessary dependencies:
|
||||
|
||||
```bash
|
||||
uv add crewai-tools bedrock-agentcore
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai_tools.aws import create_code_interpreter_toolkit
|
||||
|
||||
# Create the code interpreter toolkit
|
||||
toolkit, code_tools = create_code_interpreter_toolkit(region="us-west-2")
|
||||
|
||||
# Create the Bedrock LLM
|
||||
llm = LLM(
|
||||
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
region_name="us-west-2",
|
||||
)
|
||||
|
||||
# Create a CrewAI agent that uses the code interpreter tools
|
||||
developer_agent = Agent(
|
||||
role="Python Developer",
|
||||
goal="Create and execute Python code to solve problems.",
|
||||
backstory="You're a skilled Python developer with expertise in data analysis.",
|
||||
tools=code_tools,
|
||||
llm=llm
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
coding_task = Task(
|
||||
description="Write a Python function that calculates the factorial of a number and test it. Do not use any imports from outside the Python standard library.",
|
||||
expected_output="The Python function created, and the test results.",
|
||||
agent=developer_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[developer_agent],
|
||||
tasks=[coding_task]
|
||||
)
|
||||
result = crew.kickoff()
|
||||
|
||||
print(f"\n***Final result:***\n\n{result}")
|
||||
|
||||
# Clean up resources when done
|
||||
import asyncio
|
||||
asyncio.run(toolkit.cleanup())
|
||||
```
|
||||
|
||||
### Available Tools
|
||||
|
||||
The toolkit provides the following tools:
|
||||
|
||||
1. `execute_code` - Run code in various languages (primarily Python)
|
||||
2. `execute_command` - Run shell commands in the environment
|
||||
3. `read_files` - Read content of files in the environment
|
||||
4. `list_files` - List files in directories
|
||||
5. `delete_files` - Remove files from the environment
|
||||
6. `write_files` - Create or update files
|
||||
7. `start_command_execution` - Start long-running commands asynchronously
|
||||
8. `get_task` - Check status of async tasks
|
||||
9. `stop_task` - Stop running tasks
|
||||
|
||||
### Advanced Usage
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai_tools.aws import create_code_interpreter_toolkit
|
||||
|
||||
# Create the code interpreter toolkit
|
||||
toolkit, code_tools = create_code_interpreter_toolkit(region="us-west-2")
|
||||
tools_by_name = toolkit.get_tools_by_name()
|
||||
|
||||
# Create the Bedrock LLM
|
||||
llm = LLM(
|
||||
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
region_name="us-west-2",
|
||||
)
|
||||
|
||||
# Create agents with specific tools
|
||||
code_agent = Agent(
|
||||
role="Code Developer",
|
||||
goal="Write and execute code",
|
||||
backstory="You write and test code to solve complex problems.",
|
||||
tools=[
|
||||
# Use specific tools by name
|
||||
tools_by_name["execute_code"],
|
||||
tools_by_name["execute_command"],
|
||||
tools_by_name["read_files"],
|
||||
tools_by_name["write_files"]
|
||||
],
|
||||
llm=llm
|
||||
)
|
||||
|
||||
file_agent = Agent(
|
||||
role="File Manager",
|
||||
goal="Manage files in the environment",
|
||||
backstory="You help organize and manage files in the code environment.",
|
||||
tools=[
|
||||
# Use specific tools by name
|
||||
tools_by_name["list_files"],
|
||||
tools_by_name["read_files"],
|
||||
tools_by_name["write_files"],
|
||||
tools_by_name["delete_files"]
|
||||
],
|
||||
llm=llm
|
||||
)
|
||||
|
||||
# Create tasks for the agents
|
||||
coding_task = Task(
|
||||
description="Write a Python script to analyze data from a CSV file. Do not use any imports from outside the Python standard library.",
|
||||
expected_output="The Python function created.",
|
||||
agent=code_agent
|
||||
)
|
||||
|
||||
file_task = Task(
|
||||
description="Organize the created files into separate directories.",
|
||||
agent=file_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[code_agent, file_agent],
|
||||
tasks=[coding_task, file_task]
|
||||
)
|
||||
result = crew.kickoff()
|
||||
|
||||
print(f"\n***Final result:***\n\n{result}")
|
||||
|
||||
# Clean up code interpreter resources when done
|
||||
import asyncio
|
||||
asyncio.run(toolkit.cleanup())
|
||||
```
|
||||
|
||||
### Example: Data Analysis with Python
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew, LLM
|
||||
from crewai_tools.aws import create_code_interpreter_toolkit
|
||||
|
||||
# Create toolkit and tools
|
||||
toolkit, code_tools = create_code_interpreter_toolkit(region="us-west-2")
|
||||
|
||||
# Create the Bedrock LLM
|
||||
llm = LLM(
|
||||
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
region_name="us-west-2",
|
||||
)
|
||||
|
||||
# Create a data analyst agent
|
||||
analyst_agent = Agent(
|
||||
role="Data Analyst",
|
||||
goal="Analyze data using Python",
|
||||
backstory="You're an expert data analyst who uses Python for data processing.",
|
||||
tools=code_tools,
|
||||
llm=llm
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
analysis_task = Task(
|
||||
description="""
|
||||
For all of the below, do not use any imports from outside the Python standard library.
|
||||
1. Create a sample dataset with random data
|
||||
2. Perform statistical analysis on the dataset
|
||||
3. Generate visualizations of the results
|
||||
4. Save the results and visualizations to files
|
||||
""",
|
||||
agent=analyst_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[analyst_agent],
|
||||
tasks=[analysis_task]
|
||||
)
|
||||
result = crew.kickoff()
|
||||
|
||||
print(f"\n***Final result:***\n\n{result}")
|
||||
|
||||
# Clean up resources
|
||||
import asyncio
|
||||
asyncio.run(toolkit.cleanup())
|
||||
```
|
||||
|
||||
## Resource Cleanup
|
||||
|
||||
Always clean up code interpreter resources when done to prevent resource leaks:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
|
||||
# Clean up all code interpreter sessions
|
||||
asyncio.run(toolkit.cleanup())
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- AWS account with access to Bedrock AgentCore API
|
||||
- Properly configured AWS credentials
|
||||
3
crewai_tools/aws/bedrock/code_interpreter/__init__.py
Normal file
3
crewai_tools/aws/bedrock/code_interpreter/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .code_interpreter_toolkit import CodeInterpreterToolkit, create_code_interpreter_toolkit
|
||||
|
||||
__all__ = ["CodeInterpreterToolkit", "create_code_interpreter_toolkit"]
|
||||
@@ -0,0 +1,543 @@
|
||||
"""Toolkit for working with AWS Bedrock Code Interpreter."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, Type, Any
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bedrock_agentcore.tools.code_interpreter_client import CodeInterpreter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_output_from_stream(response):
|
||||
"""
|
||||
Extract output from code interpreter response stream
|
||||
|
||||
Args:
|
||||
response: Response from code interpreter execution
|
||||
|
||||
Returns:
|
||||
Extracted output as string
|
||||
"""
|
||||
output = []
|
||||
for event in response["stream"]:
|
||||
if "result" in event:
|
||||
result = event["result"]
|
||||
for content_item in result["content"]:
|
||||
if content_item["type"] == "text":
|
||||
output.append(content_item["text"])
|
||||
if content_item["type"] == "resource":
|
||||
resource = content_item["resource"]
|
||||
if "text" in resource:
|
||||
file_path = resource["uri"].replace("file://", "")
|
||||
file_content = resource["text"]
|
||||
output.append(f"==== File: {file_path} ====\n{file_content}\n")
|
||||
else:
|
||||
output.append(json.dumps(resource))
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
# Input schemas
|
||||
class ExecuteCodeInput(BaseModel):
|
||||
"""Input for ExecuteCode."""
|
||||
code: str = Field(description="The code to execute")
|
||||
language: str = Field(default="python", description="The programming language of the code")
|
||||
clear_context: bool = Field(default=False, description="Whether to clear execution context")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class ExecuteCommandInput(BaseModel):
|
||||
"""Input for ExecuteCommand."""
|
||||
command: str = Field(description="The command to execute")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class ReadFilesInput(BaseModel):
|
||||
"""Input for ReadFiles."""
|
||||
paths: List[str] = Field(description="List of file paths to read")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class ListFilesInput(BaseModel):
|
||||
"""Input for ListFiles."""
|
||||
directory_path: str = Field(default="", description="Path to the directory to list")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class DeleteFilesInput(BaseModel):
|
||||
"""Input for DeleteFiles."""
|
||||
paths: List[str] = Field(description="List of file paths to delete")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class WriteFilesInput(BaseModel):
|
||||
"""Input for WriteFiles."""
|
||||
files: List[Dict[str, str]] = Field(description="List of dictionaries with path and text fields")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class StartCommandInput(BaseModel):
|
||||
"""Input for StartCommand."""
|
||||
command: str = Field(description="The command to execute asynchronously")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class GetTaskInput(BaseModel):
|
||||
"""Input for GetTask."""
|
||||
task_id: str = Field(description="The ID of the task to check")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
class StopTaskInput(BaseModel):
|
||||
"""Input for StopTask."""
|
||||
task_id: str = Field(description="The ID of the task to stop")
|
||||
thread_id: str = Field(default="default", description="Thread ID for the code interpreter session")
|
||||
|
||||
|
||||
# Tool classes
|
||||
class ExecuteCodeTool(BaseTool):
|
||||
"""Tool for executing code in various languages."""
|
||||
name: str = "execute_code"
|
||||
description: str = "Execute code in various languages (primarily Python)"
|
||||
args_schema: Type[BaseModel] = ExecuteCodeInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, code: str, language: str = "python", clear_context: bool = False, thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Execute code
|
||||
response = code_interpreter.invoke(
|
||||
method="executeCode",
|
||||
params={"code": code, "language": language, "clearContext": clear_context},
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error executing code: {str(e)}"
|
||||
|
||||
async def _arun(self, code: str, language: str = "python", clear_context: bool = False, thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(code=code, language=language, clear_context=clear_context, thread_id=thread_id)
|
||||
|
||||
|
||||
class ExecuteCommandTool(BaseTool):
|
||||
"""Tool for running shell commands in the code interpreter environment."""
|
||||
name: str = "execute_command"
|
||||
description: str = "Run shell commands in the code interpreter environment"
|
||||
args_schema: Type[BaseModel] = ExecuteCommandInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, command: str, thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Execute command
|
||||
response = code_interpreter.invoke(
|
||||
method="executeCommand", params={"command": command}
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error executing command: {str(e)}"
|
||||
|
||||
async def _arun(self, command: str, thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(command=command, thread_id=thread_id)
|
||||
|
||||
|
||||
class ReadFilesTool(BaseTool):
|
||||
"""Tool for reading content of files in the environment."""
|
||||
name: str = "read_files"
|
||||
description: str = "Read content of files in the environment"
|
||||
args_schema: Type[BaseModel] = ReadFilesInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, paths: List[str], thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Read files
|
||||
response = code_interpreter.invoke(method="readFiles", params={"paths": paths})
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error reading files: {str(e)}"
|
||||
|
||||
async def _arun(self, paths: List[str], thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(paths=paths, thread_id=thread_id)
|
||||
|
||||
|
||||
class ListFilesTool(BaseTool):
|
||||
"""Tool for listing files in directories in the environment."""
|
||||
name: str = "list_files"
|
||||
description: str = "List files in directories in the environment"
|
||||
args_schema: Type[BaseModel] = ListFilesInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, directory_path: str = "", thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# List files
|
||||
response = code_interpreter.invoke(
|
||||
method="listFiles", params={"directoryPath": directory_path}
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error listing files: {str(e)}"
|
||||
|
||||
async def _arun(self, directory_path: str = "", thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(directory_path=directory_path, thread_id=thread_id)
|
||||
|
||||
|
||||
class DeleteFilesTool(BaseTool):
|
||||
"""Tool for removing files from the environment."""
|
||||
name: str = "delete_files"
|
||||
description: str = "Remove files from the environment"
|
||||
args_schema: Type[BaseModel] = DeleteFilesInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, paths: List[str], thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Remove files
|
||||
response = code_interpreter.invoke(
|
||||
method="removeFiles", params={"paths": paths}
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error deleting files: {str(e)}"
|
||||
|
||||
async def _arun(self, paths: List[str], thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(paths=paths, thread_id=thread_id)
|
||||
|
||||
|
||||
class WriteFilesTool(BaseTool):
|
||||
"""Tool for creating or updating files in the environment."""
|
||||
name: str = "write_files"
|
||||
description: str = "Create or update files in the environment"
|
||||
args_schema: Type[BaseModel] = WriteFilesInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, files: List[Dict[str, str]], thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Write files
|
||||
response = code_interpreter.invoke(
|
||||
method="writeFiles", params={"content": files}
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error writing files: {str(e)}"
|
||||
|
||||
async def _arun(self, files: List[Dict[str, str]], thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(files=files, thread_id=thread_id)
|
||||
|
||||
|
||||
class StartCommandTool(BaseTool):
|
||||
"""Tool for starting long-running commands asynchronously."""
|
||||
name: str = "start_command_execution"
|
||||
description: str = "Start long-running commands asynchronously"
|
||||
args_schema: Type[BaseModel] = StartCommandInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, command: str, thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Start command execution
|
||||
response = code_interpreter.invoke(
|
||||
method="startCommandExecution", params={"command": command}
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error starting command: {str(e)}"
|
||||
|
||||
async def _arun(self, command: str, thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(command=command, thread_id=thread_id)
|
||||
|
||||
|
||||
class GetTaskTool(BaseTool):
|
||||
"""Tool for checking status of async tasks."""
|
||||
name: str = "get_task"
|
||||
description: str = "Check status of async tasks"
|
||||
args_schema: Type[BaseModel] = GetTaskInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, task_id: str, thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Get task status
|
||||
response = code_interpreter.invoke(method="getTask", params={"taskId": task_id})
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error getting task status: {str(e)}"
|
||||
|
||||
async def _arun(self, task_id: str, thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(task_id=task_id, thread_id=thread_id)
|
||||
|
||||
|
||||
class StopTaskTool(BaseTool):
|
||||
"""Tool for stopping running tasks."""
|
||||
name: str = "stop_task"
|
||||
description: str = "Stop running tasks"
|
||||
args_schema: Type[BaseModel] = StopTaskInput
|
||||
toolkit: Any = Field(default=None, exclude=True)
|
||||
|
||||
def __init__(self, toolkit):
|
||||
super().__init__()
|
||||
self.toolkit = toolkit
|
||||
|
||||
def _run(self, task_id: str, thread_id: str = "default") -> str:
|
||||
try:
|
||||
# Get or create code interpreter
|
||||
code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id)
|
||||
|
||||
# Stop task
|
||||
response = code_interpreter.invoke(
|
||||
method="stopTask", params={"taskId": task_id}
|
||||
)
|
||||
|
||||
return extract_output_from_stream(response)
|
||||
except Exception as e:
|
||||
return f"Error stopping task: {str(e)}"
|
||||
|
||||
async def _arun(self, task_id: str, thread_id: str = "default") -> str:
|
||||
# Use _run as we're working with a synchronous API that's thread-safe
|
||||
return self._run(task_id=task_id, thread_id=thread_id)
|
||||
|
||||
|
||||
class CodeInterpreterToolkit:
|
||||
"""Toolkit for working with AWS Bedrock code interpreter environment.
|
||||
|
||||
This toolkit provides a set of tools for working with a remote code interpreter environment:
|
||||
|
||||
* execute_code - Run code in various languages (primarily Python)
|
||||
* execute_command - Run shell commands
|
||||
* read_files - Read content of files in the environment
|
||||
* list_files - List files in directories
|
||||
* delete_files - Remove files from the environment
|
||||
* write_files - Create or update files
|
||||
* start_command_execution - Start long-running commands asynchronously
|
||||
* get_task - Check status of async tasks
|
||||
* stop_task - Stop running tasks
|
||||
|
||||
The toolkit lazily initializes the code interpreter session on first use.
|
||||
It supports multiple threads by maintaining separate code interpreter sessions for each thread ID.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools.aws.bedrock.code_interpreter import create_code_interpreter_toolkit
|
||||
|
||||
# Create the code interpreter toolkit
|
||||
toolkit, code_tools = create_code_interpreter_toolkit(region="us-west-2")
|
||||
|
||||
# Create a CrewAI agent that uses the code interpreter tools
|
||||
developer_agent = Agent(
|
||||
role="Python Developer",
|
||||
goal="Create and execute Python code to solve problems",
|
||||
backstory="You're a skilled Python developer with expertise in data analysis.",
|
||||
tools=code_tools
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
coding_task = Task(
|
||||
description="Write a Python function that calculates the factorial of a number and test it.",
|
||||
agent=developer_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[developer_agent],
|
||||
tasks=[coding_task]
|
||||
)
|
||||
result = crew.kickoff()
|
||||
|
||||
# Clean up resources when done
|
||||
import asyncio
|
||||
asyncio.run(toolkit.cleanup())
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, region: str = "us-west-2"):
|
||||
"""
|
||||
Initialize the toolkit
|
||||
|
||||
Args:
|
||||
region: AWS region for the code interpreter
|
||||
"""
|
||||
self.region = region
|
||||
self._code_interpreters: Dict[str, CodeInterpreter] = {}
|
||||
self.tools: List[BaseTool] = []
|
||||
self._setup_tools()
|
||||
|
||||
def _setup_tools(self) -> None:
|
||||
"""Initialize tools without creating any code interpreter sessions."""
|
||||
self.tools = [
|
||||
ExecuteCodeTool(self),
|
||||
ExecuteCommandTool(self),
|
||||
ReadFilesTool(self),
|
||||
ListFilesTool(self),
|
||||
DeleteFilesTool(self),
|
||||
WriteFilesTool(self),
|
||||
StartCommandTool(self),
|
||||
GetTaskTool(self),
|
||||
StopTaskTool(self)
|
||||
]
|
||||
|
||||
def _get_or_create_interpreter(
|
||||
self, thread_id: str = "default"
|
||||
) -> CodeInterpreter:
|
||||
"""Get or create a code interpreter for the specified thread.
|
||||
|
||||
Args:
|
||||
thread_id: Thread ID for the code interpreter session
|
||||
|
||||
Returns:
|
||||
CodeInterpreter instance
|
||||
"""
|
||||
if thread_id in self._code_interpreters:
|
||||
return self._code_interpreters[thread_id]
|
||||
|
||||
# Create a new code interpreter for this thread
|
||||
from bedrock_agentcore.tools.code_interpreter_client import CodeInterpreter
|
||||
code_interpreter = CodeInterpreter(region=self.region)
|
||||
code_interpreter.start()
|
||||
logger.info(
|
||||
f"Started code interpreter with session_id:{code_interpreter.session_id} for thread:{thread_id}"
|
||||
)
|
||||
|
||||
# Store the interpreter
|
||||
self._code_interpreters[thread_id] = code_interpreter
|
||||
return code_interpreter
|
||||
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""
|
||||
Get the list of code interpreter tools
|
||||
|
||||
Returns:
|
||||
List of CrewAI tools
|
||||
"""
|
||||
return self.tools
|
||||
|
||||
def get_tools_by_name(self) -> Dict[str, BaseTool]:
|
||||
"""
|
||||
Get a dictionary of tools mapped by their names
|
||||
|
||||
Returns:
|
||||
Dictionary of {tool_name: tool}
|
||||
"""
|
||||
return {tool.name: tool for tool in self.tools}
|
||||
|
||||
async def cleanup(self, thread_id: Optional[str] = None) -> None:
|
||||
"""Clean up resources
|
||||
|
||||
Args:
|
||||
thread_id: Optional thread ID to clean up. If None, cleans up all sessions.
|
||||
"""
|
||||
if thread_id:
|
||||
# Clean up a specific thread's session
|
||||
if thread_id in self._code_interpreters:
|
||||
try:
|
||||
self._code_interpreters[thread_id].stop()
|
||||
del self._code_interpreters[thread_id]
|
||||
logger.info(
|
||||
f"Code interpreter session for thread {thread_id} cleaned up"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error stopping code interpreter for thread {thread_id}: {e}"
|
||||
)
|
||||
else:
|
||||
# Clean up all sessions
|
||||
thread_ids = list(self._code_interpreters.keys())
|
||||
for tid in thread_ids:
|
||||
try:
|
||||
self._code_interpreters[tid].stop()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error stopping code interpreter for thread {tid}: {e}"
|
||||
)
|
||||
|
||||
self._code_interpreters = {}
|
||||
logger.info("All code interpreter sessions cleaned up")
|
||||
|
||||
|
||||
def create_code_interpreter_toolkit(
|
||||
region: str = "us-west-2",
|
||||
) -> Tuple[CodeInterpreterToolkit, List[BaseTool]]:
|
||||
"""
|
||||
Create a CodeInterpreterToolkit
|
||||
|
||||
Args:
|
||||
region: AWS region for code interpreter
|
||||
|
||||
Returns:
|
||||
Tuple of (toolkit, tools)
|
||||
"""
|
||||
toolkit = CodeInterpreterToolkit(region=region)
|
||||
tools = toolkit.get_tools()
|
||||
return toolkit, tools
|
||||
17
crewai_tools/aws/bedrock/exceptions.py
Normal file
17
crewai_tools/aws/bedrock/exceptions.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Custom exceptions for AWS Bedrock integration."""
|
||||
|
||||
class BedrockError(Exception):
|
||||
"""Base exception for Bedrock-related errors."""
|
||||
pass
|
||||
|
||||
class BedrockAgentError(BedrockError):
|
||||
"""Exception raised for errors in the Bedrock Agent operations."""
|
||||
pass
|
||||
|
||||
class BedrockKnowledgeBaseError(BedrockError):
|
||||
"""Exception raised for errors in the Bedrock Knowledge Base operations."""
|
||||
pass
|
||||
|
||||
class BedrockValidationError(BedrockError):
|
||||
"""Exception raised for validation errors in Bedrock operations."""
|
||||
pass
|
||||
159
crewai_tools/aws/bedrock/knowledge_base/README.md
Normal file
159
crewai_tools/aws/bedrock/knowledge_base/README.md
Normal file
@@ -0,0 +1,159 @@
|
||||
# BedrockKBRetrieverTool
|
||||
|
||||
The `BedrockKBRetrieverTool` enables CrewAI agents to retrieve information from Amazon Bedrock Knowledge Bases using natural language queries.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- AWS credentials configured (either through environment variables or AWS CLI)
|
||||
- `boto3` and `python-dotenv` packages
|
||||
- Access to Amazon Bedrock Knowledge Base
|
||||
|
||||
## Usage
|
||||
|
||||
Here's how to use the tool with a CrewAI agent:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools.aws.bedrock.knowledge_base.retriever_tool import BedrockKBRetrieverTool
|
||||
|
||||
# Initialize the tool
|
||||
kb_tool = BedrockKBRetrieverTool(
|
||||
knowledge_base_id="your-kb-id",
|
||||
number_of_results=5
|
||||
)
|
||||
|
||||
# Create a CrewAI agent that uses the tool
|
||||
researcher = Agent(
|
||||
role='Knowledge Base Researcher',
|
||||
goal='Find information about company policies',
|
||||
backstory='I am a researcher specialized in retrieving and analyzing company documentation.',
|
||||
tools=[kb_tool],
|
||||
verbose=True
|
||||
)
|
||||
|
||||
# Create a task for the agent
|
||||
research_task = Task(
|
||||
description="Find our company's remote work policy and summarize the key points.",
|
||||
agent=researcher
|
||||
)
|
||||
|
||||
# Create a crew with the agent
|
||||
crew = Crew(
|
||||
agents=[researcher],
|
||||
tasks=[research_task],
|
||||
verbose=2
|
||||
)
|
||||
|
||||
# Run the crew
|
||||
result = crew.kickoff()
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Tool Arguments
|
||||
|
||||
| Argument | Type | Required | Default | Description |
|
||||
|----------|------|----------|---------|-------------|
|
||||
| knowledge_base_id | str | Yes | None | The unique identifier of the knowledge base (0-10 alphanumeric characters) |
|
||||
| number_of_results | int | No | 5 | Maximum number of results to return |
|
||||
| retrieval_configuration | dict | No | None | Custom configurations for the knowledge base query |
|
||||
| guardrail_configuration | dict | No | None | Content filtering settings |
|
||||
| next_token | str | No | None | Token for pagination |
|
||||
|
||||
## Environment Variables
|
||||
|
||||
```bash
|
||||
BEDROCK_KB_ID=your-knowledge-base-id # Alternative to passing knowledge_base_id
|
||||
AWS_REGION=your-aws-region # Defaults to us-east-1
|
||||
AWS_ACCESS_KEY_ID=your-access-key # Required for AWS authentication
|
||||
AWS_SECRET_ACCESS_KEY=your-secret-key # Required for AWS authentication
|
||||
```
|
||||
|
||||
## Response Format
|
||||
|
||||
The tool returns results in JSON format:
|
||||
|
||||
```json
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"content": "Retrieved text content",
|
||||
"content_type": "text",
|
||||
"source_type": "S3",
|
||||
"source_uri": "s3://bucket/document.pdf",
|
||||
"score": 0.95,
|
||||
"metadata": {
|
||||
"additional": "metadata"
|
||||
}
|
||||
}
|
||||
],
|
||||
"nextToken": "pagination-token",
|
||||
"guardrailAction": "NONE"
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Custom Retrieval Configuration
|
||||
|
||||
```python
|
||||
kb_tool = BedrockKBRetrieverTool(
|
||||
knowledge_base_id="your-kb-id",
|
||||
retrieval_configuration={
|
||||
"vectorSearchConfiguration": {
|
||||
"numberOfResults": 10,
|
||||
"overrideSearchType": "HYBRID"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
policy_expert = Agent(
|
||||
role='Policy Expert',
|
||||
goal='Analyze company policies in detail',
|
||||
backstory='I am an expert in corporate policy analysis with deep knowledge of regulatory requirements.',
|
||||
tools=[kb_tool]
|
||||
)
|
||||
```
|
||||
|
||||
## Supported Data Sources
|
||||
|
||||
- Amazon S3
|
||||
- Confluence
|
||||
- Salesforce
|
||||
- SharePoint
|
||||
- Web pages
|
||||
- Custom document locations
|
||||
- Amazon Kendra
|
||||
- SQL databases
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Enterprise Knowledge Integration
|
||||
- Enable CrewAI agents to access your organization's proprietary knowledge without exposing sensitive data
|
||||
- Allow agents to make decisions based on your company's specific policies, procedures, and documentation
|
||||
- Create agents that can answer questions based on your internal documentation while maintaining data security
|
||||
|
||||
### Specialized Domain Knowledge
|
||||
- Connect CrewAI agents to domain-specific knowledge bases (legal, medical, technical) without retraining models
|
||||
- Leverage existing knowledge repositories that are already maintained in your AWS environment
|
||||
- Combine CrewAI's reasoning with domain-specific information from your knowledge bases
|
||||
|
||||
### Data-Driven Decision Making
|
||||
- Ground CrewAI agent responses in your actual company data rather than general knowledge
|
||||
- Ensure agents provide recommendations based on your specific business context and documentation
|
||||
- Reduce hallucinations by retrieving factual information from your knowledge bases
|
||||
|
||||
### Scalable Information Access
|
||||
- Access terabytes of organizational knowledge without embedding it all into your models
|
||||
- Dynamically query only the relevant information needed for specific tasks
|
||||
- Leverage AWS's scalable infrastructure to handle large knowledge bases efficiently
|
||||
|
||||
### Compliance and Governance
|
||||
- Ensure CrewAI agents provide responses that align with your company's approved documentation
|
||||
- Create auditable trails of information sources used by your agents
|
||||
- Maintain control over what information sources your agents can access
|
||||
3
crewai_tools/aws/bedrock/knowledge_base/__init__.py
Normal file
3
crewai_tools/aws/bedrock/knowledge_base/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .retriever_tool import BedrockKBRetrieverTool
|
||||
|
||||
__all__ = ["BedrockKBRetrieverTool"]
|
||||
248
crewai_tools/aws/bedrock/knowledge_base/retriever_tool.py
Normal file
248
crewai_tools/aws/bedrock/knowledge_base/retriever_tool.py
Normal file
@@ -0,0 +1,248 @@
|
||||
from typing import Type, Optional, List, Dict, Any
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..exceptions import BedrockKnowledgeBaseError, BedrockValidationError
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class BedrockKBRetrieverToolInput(BaseModel):
|
||||
"""Input schema for BedrockKBRetrieverTool."""
|
||||
query: str = Field(..., description="The query to retrieve information from the knowledge base")
|
||||
|
||||
|
||||
class BedrockKBRetrieverTool(BaseTool):
|
||||
name: str = "Bedrock Knowledge Base Retriever Tool"
|
||||
description: str = "Retrieves information from an Amazon Bedrock Knowledge Base given a query"
|
||||
args_schema: Type[BaseModel] = BedrockKBRetrieverToolInput
|
||||
knowledge_base_id: str = None
|
||||
number_of_results: Optional[int] = 5
|
||||
retrieval_configuration: Optional[Dict[str, Any]] = None
|
||||
guardrail_configuration: Optional[Dict[str, Any]] = None
|
||||
next_token: Optional[str] = None
|
||||
package_dependencies: List[str] = ["boto3"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
knowledge_base_id: str = None,
|
||||
number_of_results: Optional[int] = 5,
|
||||
retrieval_configuration: Optional[Dict[str, Any]] = None,
|
||||
guardrail_configuration: Optional[Dict[str, Any]] = None,
|
||||
next_token: Optional[str] = None,
|
||||
**kwargs
|
||||
):
|
||||
"""Initialize the BedrockKBRetrieverTool with knowledge base configuration.
|
||||
|
||||
Args:
|
||||
knowledge_base_id (str): The unique identifier of the knowledge base to query
|
||||
number_of_results (Optional[int], optional): The maximum number of results to return. Defaults to 5.
|
||||
retrieval_configuration (Optional[Dict[str, Any]], optional): Configurations for the knowledge base query and retrieval process. Defaults to None.
|
||||
guardrail_configuration (Optional[Dict[str, Any]], optional): Guardrail settings. Defaults to None.
|
||||
next_token (Optional[str], optional): Token for retrieving the next batch of results. Defaults to None.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Get knowledge_base_id from environment variable if not provided
|
||||
self.knowledge_base_id = knowledge_base_id or os.getenv('BEDROCK_KB_ID')
|
||||
self.number_of_results = number_of_results
|
||||
self.guardrail_configuration = guardrail_configuration
|
||||
self.next_token = next_token
|
||||
|
||||
# Initialize retrieval_configuration with provided parameters or use the one provided
|
||||
if retrieval_configuration is None:
|
||||
self.retrieval_configuration = self._build_retrieval_configuration()
|
||||
else:
|
||||
self.retrieval_configuration = retrieval_configuration
|
||||
|
||||
# Validate parameters
|
||||
self._validate_parameters()
|
||||
|
||||
# Update the description to include the knowledge base details
|
||||
self.description = f"Retrieves information from Amazon Bedrock Knowledge Base '{self.knowledge_base_id}' given a query"
|
||||
|
||||
def _build_retrieval_configuration(self) -> Dict[str, Any]:
|
||||
"""Build the retrieval configuration based on provided parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The constructed retrieval configuration
|
||||
"""
|
||||
vector_search_config = {}
|
||||
|
||||
# Add number of results if provided
|
||||
if self.number_of_results is not None:
|
||||
vector_search_config["numberOfResults"] = self.number_of_results
|
||||
|
||||
return {"vectorSearchConfiguration": vector_search_config}
|
||||
|
||||
def _validate_parameters(self):
|
||||
"""Validate the parameters according to AWS API requirements."""
|
||||
try:
|
||||
# Validate knowledge_base_id
|
||||
if not self.knowledge_base_id:
|
||||
raise BedrockValidationError("knowledge_base_id cannot be empty")
|
||||
if not isinstance(self.knowledge_base_id, str):
|
||||
raise BedrockValidationError("knowledge_base_id must be a string")
|
||||
if len(self.knowledge_base_id) > 10:
|
||||
raise BedrockValidationError("knowledge_base_id must be 10 characters or less")
|
||||
if not all(c.isalnum() for c in self.knowledge_base_id):
|
||||
raise BedrockValidationError("knowledge_base_id must contain only alphanumeric characters")
|
||||
|
||||
# Validate next_token if provided
|
||||
if self.next_token:
|
||||
if not isinstance(self.next_token, str):
|
||||
raise BedrockValidationError("next_token must be a string")
|
||||
if len(self.next_token) < 1 or len(self.next_token) > 2048:
|
||||
raise BedrockValidationError("next_token must be between 1 and 2048 characters")
|
||||
if ' ' in self.next_token:
|
||||
raise BedrockValidationError("next_token cannot contain spaces")
|
||||
|
||||
# Validate number_of_results if provided
|
||||
if self.number_of_results is not None:
|
||||
if not isinstance(self.number_of_results, int):
|
||||
raise BedrockValidationError("number_of_results must be an integer")
|
||||
if self.number_of_results < 1:
|
||||
raise BedrockValidationError("number_of_results must be greater than 0")
|
||||
|
||||
except BedrockValidationError as e:
|
||||
raise BedrockValidationError(f"Parameter validation failed: {str(e)}")
|
||||
|
||||
def _process_retrieval_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process a single retrieval result from Bedrock Knowledge Base.
|
||||
|
||||
Args:
|
||||
result (Dict[str, Any]): Raw result from Bedrock Knowledge Base
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Processed result with standardized format
|
||||
"""
|
||||
# Extract content
|
||||
content_obj = result.get('content', {})
|
||||
content = content_obj.get('text', '')
|
||||
content_type = content_obj.get('type', 'text')
|
||||
|
||||
# Extract location information
|
||||
location = result.get('location', {})
|
||||
location_type = location.get('type', 'unknown')
|
||||
source_uri = None
|
||||
|
||||
# Map for location types and their URI fields
|
||||
location_mapping = {
|
||||
's3Location': {'field': 'uri', 'type': 'S3'},
|
||||
'confluenceLocation': {'field': 'url', 'type': 'Confluence'},
|
||||
'salesforceLocation': {'field': 'url', 'type': 'Salesforce'},
|
||||
'sharePointLocation': {'field': 'url', 'type': 'SharePoint'},
|
||||
'webLocation': {'field': 'url', 'type': 'Web'},
|
||||
'customDocumentLocation': {'field': 'id', 'type': 'CustomDocument'},
|
||||
'kendraDocumentLocation': {'field': 'uri', 'type': 'KendraDocument'},
|
||||
'sqlLocation': {'field': 'query', 'type': 'SQL'}
|
||||
}
|
||||
|
||||
# Extract the URI based on location type
|
||||
for loc_key, config in location_mapping.items():
|
||||
if loc_key in location:
|
||||
source_uri = location[loc_key].get(config['field'])
|
||||
if not location_type or location_type == 'unknown':
|
||||
location_type = config['type']
|
||||
break
|
||||
|
||||
# Create result object
|
||||
result_object = {
|
||||
'content': content,
|
||||
'content_type': content_type,
|
||||
'source_type': location_type,
|
||||
'source_uri': source_uri
|
||||
}
|
||||
|
||||
# Add optional fields if available
|
||||
if 'score' in result:
|
||||
result_object['score'] = result['score']
|
||||
|
||||
if 'metadata' in result:
|
||||
result_object['metadata'] = result['metadata']
|
||||
|
||||
# Handle byte content if present
|
||||
if 'byteContent' in content_obj:
|
||||
result_object['byte_content'] = content_obj['byteContent']
|
||||
|
||||
# Handle row content if present
|
||||
if 'row' in content_obj:
|
||||
result_object['row_content'] = content_obj['row']
|
||||
|
||||
return result_object
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
except ImportError:
|
||||
raise ImportError("`boto3` package not found, please run `uv add boto3`")
|
||||
|
||||
try:
|
||||
# Initialize the Bedrock Agent Runtime client
|
||||
bedrock_agent_runtime = boto3.client(
|
||||
'bedrock-agent-runtime',
|
||||
region_name=os.getenv('AWS_REGION', os.getenv('AWS_DEFAULT_REGION', 'us-east-1')),
|
||||
# AWS SDK will automatically use AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY from environment
|
||||
)
|
||||
|
||||
# Prepare the request parameters
|
||||
retrieve_params = {
|
||||
'knowledgeBaseId': self.knowledge_base_id,
|
||||
'retrievalQuery': {
|
||||
'text': query
|
||||
}
|
||||
}
|
||||
|
||||
# Add optional parameters if provided
|
||||
if self.retrieval_configuration:
|
||||
retrieve_params['retrievalConfiguration'] = self.retrieval_configuration
|
||||
|
||||
if self.guardrail_configuration:
|
||||
retrieve_params['guardrailConfiguration'] = self.guardrail_configuration
|
||||
|
||||
if self.next_token:
|
||||
retrieve_params['nextToken'] = self.next_token
|
||||
|
||||
# Make the retrieve API call
|
||||
response = bedrock_agent_runtime.retrieve(**retrieve_params)
|
||||
|
||||
# Process the response
|
||||
results = []
|
||||
for result in response.get('retrievalResults', []):
|
||||
processed_result = self._process_retrieval_result(result)
|
||||
results.append(processed_result)
|
||||
|
||||
# Build the response object
|
||||
response_object = {}
|
||||
if results:
|
||||
response_object["results"] = results
|
||||
else:
|
||||
response_object["message"] = "No results found for the given query."
|
||||
|
||||
if "nextToken" in response:
|
||||
response_object["nextToken"] = response["nextToken"]
|
||||
|
||||
if "guardrailAction" in response:
|
||||
response_object["guardrailAction"] = response["guardrailAction"]
|
||||
|
||||
# Return the results as a JSON string
|
||||
return json.dumps(response_object, indent=2)
|
||||
|
||||
except ClientError as e:
|
||||
error_code = "Unknown"
|
||||
error_message = str(e)
|
||||
|
||||
# Try to extract error code if available
|
||||
if hasattr(e, 'response') and 'Error' in e.response:
|
||||
error_code = e.response['Error'].get('Code', 'Unknown')
|
||||
error_message = e.response['Error'].get('Message', str(e))
|
||||
|
||||
raise BedrockKnowledgeBaseError(f"Error ({error_code}): {error_message}")
|
||||
except Exception as e:
|
||||
raise BedrockKnowledgeBaseError(f"Unexpected error: {str(e)}")
|
||||
52
crewai_tools/aws/s3/README.md
Normal file
52
crewai_tools/aws/s3/README.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# AWS S3 Tools
|
||||
|
||||
## Description
|
||||
|
||||
These tools provide a way to interact with Amazon S3, a cloud storage service.
|
||||
|
||||
## Installation
|
||||
|
||||
Install the crewai_tools package
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## AWS Connectivity
|
||||
|
||||
The tools use `boto3` to connect to AWS S3.
|
||||
You can configure your environment to use AWS IAM roles, see [AWS IAM Roles documentation](https://docs.aws.amazon.com/sdk-for-python/v1/developer-guide/iam-roles.html#creating-an-iam-role)
|
||||
|
||||
Set the following environment variables:
|
||||
|
||||
- `CREW_AWS_REGION`
|
||||
- `CREW_AWS_ACCESS_KEY_ID`
|
||||
- `CREW_AWS_SEC_ACCESS_KEY`
|
||||
|
||||
## Usage
|
||||
|
||||
To use the AWS S3 tools in your CrewAI agents, import the necessary tools and include them in your agent's configuration:
|
||||
|
||||
```python
|
||||
from crewai_tools.aws.s3 import S3ReaderTool, S3WriterTool
|
||||
|
||||
# For reading from S3
|
||||
@agent
|
||||
def file_retriever(self) -> Agent:
|
||||
return Agent(
|
||||
config=self.agents_config['file_retriever'],
|
||||
verbose=True,
|
||||
tools=[S3ReaderTool()]
|
||||
)
|
||||
|
||||
# For writing to S3
|
||||
@agent
|
||||
def file_uploader(self) -> Agent:
|
||||
return Agent(
|
||||
config=self.agents_config['file_uploader'],
|
||||
verbose=True,
|
||||
tools=[S3WriterTool()]
|
||||
)
|
||||
```
|
||||
|
||||
These tools can be used to read from and write to S3 buckets within your CrewAI workflows. Make sure you have properly configured your AWS credentials as mentioned in the AWS Connectivity section above.
|
||||
2
crewai_tools/aws/s3/__init__.py
Normal file
2
crewai_tools/aws/s3/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from .reader_tool import S3ReaderTool
|
||||
from .writer_tool import S3WriterTool
|
||||
47
crewai_tools/aws/s3/reader_tool.py
Normal file
47
crewai_tools/aws/s3/reader_tool.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from typing import Any, Type, List
|
||||
import os
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class S3ReaderToolInput(BaseModel):
|
||||
"""Input schema for S3ReaderTool."""
|
||||
|
||||
file_path: str = Field(..., description="S3 file path (e.g., 's3://bucket-name/file-name')")
|
||||
|
||||
|
||||
class S3ReaderTool(BaseTool):
|
||||
name: str = "S3 Reader Tool"
|
||||
description: str = "Reads a file from Amazon S3 given an S3 file path"
|
||||
args_schema: Type[BaseModel] = S3ReaderToolInput
|
||||
package_dependencies: List[str] = ["boto3"]
|
||||
|
||||
def _run(self, file_path: str) -> str:
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
except ImportError:
|
||||
raise ImportError("`boto3` package not found, please run `uv add boto3`")
|
||||
|
||||
try:
|
||||
bucket_name, object_key = self._parse_s3_path(file_path)
|
||||
|
||||
s3 = boto3.client(
|
||||
's3',
|
||||
region_name=os.getenv('CREW_AWS_REGION', 'us-east-1'),
|
||||
aws_access_key_id=os.getenv('CREW_AWS_ACCESS_KEY_ID'),
|
||||
aws_secret_access_key=os.getenv('CREW_AWS_SEC_ACCESS_KEY')
|
||||
)
|
||||
|
||||
# Read file content from S3
|
||||
response = s3.get_object(Bucket=bucket_name, Key=object_key)
|
||||
file_content = response['Body'].read().decode('utf-8')
|
||||
|
||||
return file_content
|
||||
except ClientError as e:
|
||||
return f"Error reading file from S3: {str(e)}"
|
||||
|
||||
def _parse_s3_path(self, file_path: str) -> tuple:
|
||||
parts = file_path.replace("s3://", "").split("/", 1)
|
||||
return parts[0], parts[1]
|
||||
43
crewai_tools/aws/s3/writer_tool.py
Normal file
43
crewai_tools/aws/s3/writer_tool.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from typing import Type, List
|
||||
import os
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class S3WriterToolInput(BaseModel):
|
||||
"""Input schema for S3WriterTool."""
|
||||
file_path: str = Field(..., description="S3 file path (e.g., 's3://bucket-name/file-name')")
|
||||
content: str = Field(..., description="Content to write to the file")
|
||||
|
||||
|
||||
class S3WriterTool(BaseTool):
|
||||
name: str = "S3 Writer Tool"
|
||||
description: str = "Writes content to a file in Amazon S3 given an S3 file path"
|
||||
args_schema: Type[BaseModel] = S3WriterToolInput
|
||||
package_dependencies: List[str] = ["boto3"]
|
||||
|
||||
def _run(self, file_path: str, content: str) -> str:
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
except ImportError:
|
||||
raise ImportError("`boto3` package not found, please run `uv add boto3`")
|
||||
|
||||
try:
|
||||
bucket_name, object_key = self._parse_s3_path(file_path)
|
||||
|
||||
s3 = boto3.client(
|
||||
's3',
|
||||
region_name=os.getenv('CREW_AWS_REGION', 'us-east-1'),
|
||||
aws_access_key_id=os.getenv('CREW_AWS_ACCESS_KEY_ID'),
|
||||
aws_secret_access_key=os.getenv('CREW_AWS_SEC_ACCESS_KEY')
|
||||
)
|
||||
|
||||
s3.put_object(Bucket=bucket_name, Key=object_key, Body=content.encode('utf-8'))
|
||||
return f"Successfully wrote content to {file_path}"
|
||||
except ClientError as e:
|
||||
return f"Error writing file to S3: {str(e)}"
|
||||
|
||||
def _parse_s3_path(self, file_path: str) -> tuple:
|
||||
parts = file_path.replace("s3://", "").split("/", 1)
|
||||
return parts[0], parts[1]
|
||||
Reference in New Issue
Block a user