mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-01 15:22:37 +00:00
Squashed 'packages/tools/' content from commit 78317b9c
git-subtree-dir: packages/tools git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
This commit is contained in:
127
crewai_tools/tools/__init__.py
Normal file
127
crewai_tools/tools/__init__.py
Normal file
@@ -0,0 +1,127 @@
|
||||
from .ai_mind_tool.ai_mind_tool import AIMindTool
|
||||
from .apify_actors_tool.apify_actors_tool import ApifyActorsTool
|
||||
from .arxiv_paper_tool.arxiv_paper_tool import ArxivPaperTool
|
||||
from .brave_search_tool.brave_search_tool import BraveSearchTool
|
||||
from .brightdata_tool import (
|
||||
BrightDataDatasetTool,
|
||||
BrightDataSearchTool,
|
||||
BrightDataWebUnlockerTool,
|
||||
)
|
||||
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
|
||||
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
|
||||
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
|
||||
from .composio_tool.composio_tool import ComposioTool
|
||||
from .contextualai_create_agent_tool.contextual_create_agent_tool import (
|
||||
ContextualAICreateAgentTool,
|
||||
)
|
||||
from .contextualai_parse_tool.contextual_parse_tool import ContextualAIParseTool
|
||||
from .contextualai_query_tool.contextual_query_tool import ContextualAIQueryTool
|
||||
from .contextualai_rerank_tool.contextual_rerank_tool import ContextualAIRerankTool
|
||||
from .couchbase_tool.couchbase_tool import CouchbaseFTSVectorSearchTool
|
||||
from .crewai_enterprise_tools.crewai_enterprise_tools import CrewaiEnterpriseTools
|
||||
from .crewai_platform_tools.crewai_platform_tools import CrewaiPlatformTools
|
||||
from .csv_search_tool.csv_search_tool import CSVSearchTool
|
||||
from .dalle_tool.dalle_tool import DallETool
|
||||
from .databricks_query_tool.databricks_query_tool import DatabricksQueryTool
|
||||
from .directory_read_tool.directory_read_tool import DirectoryReadTool
|
||||
from .directory_search_tool.directory_search_tool import DirectorySearchTool
|
||||
from .docx_search_tool.docx_search_tool import DOCXSearchTool
|
||||
from .exa_tools.exa_search_tool import EXASearchTool
|
||||
from .file_read_tool.file_read_tool import FileReadTool
|
||||
from .file_writer_tool.file_writer_tool import FileWriterTool
|
||||
from .files_compressor_tool.files_compressor_tool import FileCompressorTool
|
||||
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
)
|
||||
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
)
|
||||
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||
from .generate_crewai_automation_tool.generate_crewai_automation_tool import (
|
||||
GenerateCrewaiAutomationTool,
|
||||
)
|
||||
from .github_search_tool.github_search_tool import GithubSearchTool
|
||||
from .hyperbrowser_load_tool.hyperbrowser_load_tool import HyperbrowserLoadTool
|
||||
from .invoke_crewai_automation_tool.invoke_crewai_automation_tool import (
|
||||
InvokeCrewAIAutomationTool,
|
||||
)
|
||||
from .json_search_tool.json_search_tool import JSONSearchTool
|
||||
from .linkup.linkup_search_tool import LinkupSearchTool
|
||||
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
|
||||
from .mdx_search_tool.mdx_search_tool import MDXSearchTool
|
||||
from .mongodb_vector_search_tool import (
|
||||
MongoDBToolSchema,
|
||||
MongoDBVectorSearchConfig,
|
||||
MongoDBVectorSearchTool,
|
||||
)
|
||||
from .multion_tool.multion_tool import MultiOnTool
|
||||
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
||||
from .nl2sql.nl2sql_tool import NL2SQLTool
|
||||
from .ocr_tool.ocr_tool import OCRTool
|
||||
from .oxylabs_amazon_product_scraper_tool.oxylabs_amazon_product_scraper_tool import (
|
||||
OxylabsAmazonProductScraperTool,
|
||||
)
|
||||
from .oxylabs_amazon_search_scraper_tool.oxylabs_amazon_search_scraper_tool import (
|
||||
OxylabsAmazonSearchScraperTool,
|
||||
)
|
||||
from .oxylabs_google_search_scraper_tool.oxylabs_google_search_scraper_tool import (
|
||||
OxylabsGoogleSearchScraperTool,
|
||||
)
|
||||
from .oxylabs_universal_scraper_tool.oxylabs_universal_scraper_tool import (
|
||||
OxylabsUniversalScraperTool,
|
||||
)
|
||||
from .patronus_eval_tool import (
|
||||
PatronusEvalTool,
|
||||
PatronusLocalEvaluatorTool,
|
||||
PatronusPredefinedCriteriaEvalTool,
|
||||
)
|
||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||
from .pg_search_tool.pg_search_tool import PGSearchTool
|
||||
from .qdrant_vector_search_tool.qdrant_search_tool import QdrantVectorSearchTool
|
||||
from .rag.rag_tool import RagTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import (
|
||||
ScrapeElementFromWebsiteTool,
|
||||
)
|
||||
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
||||
from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import (
|
||||
ScrapegraphScrapeTool,
|
||||
ScrapegraphScrapeToolSchema,
|
||||
)
|
||||
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
)
|
||||
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
||||
from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool
|
||||
from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
from .serper_scrape_website_tool.serper_scrape_website_tool import (
|
||||
SerperScrapeWebsiteTool,
|
||||
)
|
||||
from .serply_api_tool.serply_job_search_tool import SerplyJobSearchTool
|
||||
from .serply_api_tool.serply_news_search_tool import SerplyNewsSearchTool
|
||||
from .serply_api_tool.serply_scholar_search_tool import SerplyScholarSearchTool
|
||||
from .serply_api_tool.serply_web_search_tool import SerplyWebSearchTool
|
||||
from .serply_api_tool.serply_webpage_to_markdown_tool import SerplyWebpageToMarkdownTool
|
||||
from .singlestore_search_tool import SingleStoreSearchTool
|
||||
from .snowflake_search_tool import (
|
||||
SnowflakeConfig,
|
||||
SnowflakeSearchTool,
|
||||
SnowflakeSearchToolInput,
|
||||
)
|
||||
from .spider_tool.spider_tool import SpiderTool
|
||||
from .stagehand_tool.stagehand_tool import StagehandTool
|
||||
from .tavily_extractor_tool.tavily_extractor_tool import TavilyExtractorTool
|
||||
from .tavily_search_tool.tavily_search_tool import TavilySearchTool
|
||||
from .txt_search_tool.txt_search_tool import TXTSearchTool
|
||||
from .vision_tool.vision_tool import VisionTool
|
||||
from .weaviate_tool.vector_search import WeaviateVectorSearchTool
|
||||
from .website_search.website_search_tool import WebsiteSearchTool
|
||||
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||
YoutubeChannelSearchTool,
|
||||
)
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||
from .zapier_action_tool.zapier_action_tool import ZapierActionTools
|
||||
from .parallel_tools import (
|
||||
ParallelSearchTool,
|
||||
)
|
||||
79
crewai_tools/tools/ai_mind_tool/README.md
Normal file
79
crewai_tools/tools/ai_mind_tool/README.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# AIMind Tool
|
||||
|
||||
## Description
|
||||
|
||||
[Minds](https://mindsdb.com/minds) are AI systems provided by [MindsDB](https://mindsdb.com/) that work similarly to large language models (LLMs) but go beyond by answering any question from any data.
|
||||
|
||||
This is accomplished by selecting the most relevant data for an answer using parametric search, understanding the meaning and providing responses within the correct context through semantic search, and finally, delivering precise answers by analyzing data and using machine learning (ML) models.
|
||||
|
||||
The `AIMindTool` can be used to query data sources in natural language by simply configuring their connection parameters.
|
||||
|
||||
## Installation
|
||||
|
||||
1. Install the `crewai[tools]` package:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
2. Install the Minds SDK:
|
||||
|
||||
```shell
|
||||
pip install minds-sdk
|
||||
```
|
||||
|
||||
3. Sign for a Minds account [here](https://mdb.ai/register), and obtain an API key.
|
||||
|
||||
4. Set the Minds API key in an environment variable named `MINDS_API_KEY`.
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from crewai_tools import AIMindTool
|
||||
|
||||
|
||||
# Initialize the AIMindTool.
|
||||
aimind_tool = AIMindTool(
|
||||
datasources=[
|
||||
{
|
||||
"description": "house sales data",
|
||||
"engine": "postgres",
|
||||
"connection_data": {
|
||||
"user": "demo_user",
|
||||
"password": "demo_password",
|
||||
"host": "samples.mindsdb.com",
|
||||
"port": 5432,
|
||||
"database": "demo",
|
||||
"schema": "demo_data"
|
||||
},
|
||||
"tables": ["house_sales"]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
aimind_tool.run("How many 3 bedroom houses were sold in 2008?")
|
||||
```
|
||||
|
||||
The `datasources` parameter is a list of dictionaries, each containing the following keys:
|
||||
|
||||
- `description`: A description of the data contained in the datasource.
|
||||
- `engine`: The engine (or type) of the datasource. Find a list of supported engines in the link below.
|
||||
- `connection_data`: A dictionary containing the connection parameters for the datasource. Find a list of connection parameters for each engine in the link below.
|
||||
- `tables`: A list of tables that the data source will use. This is optional and can be omitted if all tables in the data source are to be used.
|
||||
|
||||
A list of supported data sources and their connection parameters can be found [here](https://docs.mdb.ai/docs/data_sources).
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai.project import agent
|
||||
|
||||
|
||||
# Define an agent with the AIMindTool.
|
||||
@agent
|
||||
def researcher(self) -> Agent:
|
||||
return Agent(
|
||||
config=self.agents_config["researcher"],
|
||||
allow_delegation=False,
|
||||
tools=[aimind_tool]
|
||||
)
|
||||
```
|
||||
0
crewai_tools/tools/ai_mind_tool/__init__.py
Normal file
0
crewai_tools/tools/ai_mind_tool/__init__.py
Normal file
91
crewai_tools/tools/ai_mind_tool/ai_mind_tool.py
Normal file
91
crewai_tools/tools/ai_mind_tool/ai_mind_tool.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import os
|
||||
import secrets
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class AIMindToolConstants:
|
||||
MINDS_API_BASE_URL = "https://mdb.ai/"
|
||||
MIND_NAME_PREFIX = "crwai_mind_"
|
||||
DATASOURCE_NAME_PREFIX = "crwai_ds_"
|
||||
|
||||
|
||||
class AIMindToolInputSchema(BaseModel):
|
||||
"""Input for AIMind Tool."""
|
||||
|
||||
query: str = Field(description="Question in natural language to ask the AI-Mind")
|
||||
|
||||
|
||||
class AIMindTool(BaseTool):
|
||||
name: str = "AIMind Tool"
|
||||
description: str = (
|
||||
"A wrapper around [AI-Minds](https://mindsdb.com/minds). "
|
||||
"Useful for when you need answers to questions from your data, stored in "
|
||||
"data sources including PostgreSQL, MySQL, MariaDB, ClickHouse, Snowflake "
|
||||
"and Google BigQuery. "
|
||||
"Input should be a question in natural language."
|
||||
)
|
||||
args_schema: Type[BaseModel] = AIMindToolInputSchema
|
||||
api_key: Optional[str] = None
|
||||
datasources: Optional[List[Dict[str, Any]]] = None
|
||||
mind_name: Optional[str] = None
|
||||
package_dependencies: List[str] = ["minds-sdk"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="MINDS_API_KEY", description="API key for AI-Minds", required=True),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.api_key = api_key or os.getenv("MINDS_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("API key must be provided either through constructor or MINDS_API_KEY environment variable")
|
||||
|
||||
try:
|
||||
from minds.client import Client # type: ignore
|
||||
from minds.datasources import DatabaseConfig # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`minds_sdk` package not found, please run `pip install minds-sdk`"
|
||||
)
|
||||
|
||||
minds_client = Client(api_key=self.api_key)
|
||||
|
||||
# Convert the datasources to DatabaseConfig objects.
|
||||
datasources = []
|
||||
for datasource in self.datasources:
|
||||
config = DatabaseConfig(
|
||||
name=f"{AIMindToolConstants.DATASOURCE_NAME_PREFIX}_{secrets.token_hex(5)}",
|
||||
engine=datasource["engine"],
|
||||
description=datasource["description"],
|
||||
connection_data=datasource["connection_data"],
|
||||
tables=datasource["tables"],
|
||||
)
|
||||
datasources.append(config)
|
||||
|
||||
# Generate a random name for the Mind.
|
||||
name = f"{AIMindToolConstants.MIND_NAME_PREFIX}_{secrets.token_hex(5)}"
|
||||
|
||||
mind = minds_client.minds.create(
|
||||
name=name, datasources=datasources, replace=True
|
||||
)
|
||||
|
||||
self.mind_name = mind.name
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str
|
||||
):
|
||||
# Run the query on the AI-Mind.
|
||||
# The Minds API is OpenAI compatible and therefore, the OpenAI client can be used.
|
||||
openai_client = OpenAI(base_url=AIMindToolConstants.MINDS_API_BASE_URL, api_key=self.api_key)
|
||||
|
||||
completion = openai_client.chat.completions.create(
|
||||
model=self.mind_name,
|
||||
messages=[{"role": "user", "content": query}],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
return completion.choices[0].message.content
|
||||
96
crewai_tools/tools/apify_actors_tool/README.md
Normal file
96
crewai_tools/tools/apify_actors_tool/README.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# ApifyActorsTool
|
||||
|
||||
Integrate [Apify Actors](https://apify.com/actors) into your CrewAI workflows.
|
||||
|
||||
## Description
|
||||
|
||||
The `ApifyActorsTool` connects [Apify Actors](https://apify.com/actors), cloud-based programs for web scraping and automation, to your CrewAI workflows.
|
||||
Use any of the 4,000+ Actors on [Apify Store](https://apify.com/store) for use cases such as extracting data from social media, search engines, online maps, e-commerce sites, travel portals, or general websites.
|
||||
|
||||
For details, see the [Apify CrewAI integration](https://docs.apify.com/platform/integrations/crewai) in Apify documentation.
|
||||
|
||||
## Installation
|
||||
|
||||
To use `ApifyActorsTool`, install the necessary packages and set up your Apify API token. Follow the [Apify API documentation](https://docs.apify.com/platform/integrations/api) for steps to obtain the token.
|
||||
|
||||
### Steps
|
||||
|
||||
1. **Install dependencies**
|
||||
Install `crewai[tools]` and `langchain-apify`:
|
||||
```bash
|
||||
pip install 'crewai[tools]' langchain-apify
|
||||
```
|
||||
|
||||
2. **Set your API token**
|
||||
Export the token as an environment variable:
|
||||
```bash
|
||||
export APIFY_API_TOKEN='your-api-token-here'
|
||||
```
|
||||
|
||||
## Usage example
|
||||
|
||||
Use the `ApifyActorsTool` manually to run the [RAG Web Browser Actor](https://apify.com/apify/rag-web-browser) to perform a web search:
|
||||
|
||||
```python
|
||||
from crewai_tools import ApifyActorsTool
|
||||
|
||||
# Initialize the tool with an Apify Actor
|
||||
tool = ApifyActorsTool(actor_name="apify/rag-web-browser")
|
||||
|
||||
# Run the tool with input parameters
|
||||
results = tool.run(run_input={"query": "What is CrewAI?", "maxResults": 5})
|
||||
|
||||
# Process the results
|
||||
for result in results:
|
||||
print(f"URL: {result['metadata']['url']}")
|
||||
print(f"Content: {result.get('markdown', 'N/A')[:100]}...")
|
||||
```
|
||||
|
||||
### Expected output
|
||||
|
||||
Here is the output from running the code above:
|
||||
|
||||
```text
|
||||
URL: https://www.example.com/crewai-intro
|
||||
Content: CrewAI is a framework for building AI-powered workflows...
|
||||
URL: https://docs.crewai.com/
|
||||
Content: Official documentation for CrewAI...
|
||||
```
|
||||
|
||||
The `ApifyActorsTool` automatically fetches the Actor definition and input schema from Apify using the provided `actor_name` and then constructs the tool description and argument schema. This means you need to specify only a valid `actor_name`, and the tool handles the rest when used with agents—no need to specify the `run_input`. Here's how it works:
|
||||
|
||||
```python
|
||||
from crewai import Agent
|
||||
from crewai_tools import ApifyActorsTool
|
||||
|
||||
rag_browser = ApifyActorsTool(actor_name="apify/rag-web-browser")
|
||||
|
||||
agent = Agent(
|
||||
role="Research Analyst",
|
||||
goal="Find and summarize information about specific topics",
|
||||
backstory="You are an experienced researcher with attention to detail",
|
||||
tools=[rag_browser],
|
||||
)
|
||||
```
|
||||
|
||||
You can run other Actors from [Apify Store](https://apify.com/store) simply by changing the `actor_name` and, when using it manually, adjusting the `run_input` based on the Actor input schema.
|
||||
|
||||
For an example of usage with agents, see the [CrewAI Actor template](https://apify.com/templates/python-crewai).
|
||||
|
||||
## Configuration
|
||||
|
||||
The `ApifyActorsTool` requires these inputs to work:
|
||||
|
||||
- **`actor_name`**
|
||||
The ID of the Apify Actor to run, e.g., `"apify/rag-web-browser"`. Browse all Actors on [Apify Store](https://apify.com/store).
|
||||
- **`run_input`**
|
||||
A dictionary of input parameters for the Actor when running the tool manually.
|
||||
- For example, for the `apify/rag-web-browser` Actor: `{"query": "search term", "maxResults": 5}`
|
||||
- See the Actor's [input schema](https://apify.com/apify/rag-web-browser/input-schema) for the list of input parameters.
|
||||
|
||||
## Resources
|
||||
|
||||
- **[Apify](https://apify.com/)**: Explore the Apify platform.
|
||||
- **[How to build an AI agent on Apify](https://blog.apify.com/how-to-build-an-ai-agent/)** - A complete step-by-step guide to creating, publishing, and monetizing AI agents on the Apify platform.
|
||||
- **[RAG Web Browser Actor](https://apify.com/apify/rag-web-browser)**: A popular Actor for web search for LLMs.
|
||||
- **[CrewAI Integration Guide](https://docs.apify.com/platform/integrations/crewai)**: Follow the official guide for integrating Apify and CrewAI.
|
||||
96
crewai_tools/tools/apify_actors_tool/apify_actors_tool.py
Normal file
96
crewai_tools/tools/apify_actors_tool/apify_actors_tool.py
Normal file
@@ -0,0 +1,96 @@
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import Field
|
||||
from typing import TYPE_CHECKING, Any, Dict, List
|
||||
import os
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_apify import ApifyActorsTool as _ApifyActorsTool
|
||||
|
||||
class ApifyActorsTool(BaseTool):
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="APIFY_API_TOKEN", description="API token for Apify platform access", required=True),
|
||||
]
|
||||
"""Tool that runs Apify Actors.
|
||||
|
||||
To use, you should have the environment variable `APIFY_API_TOKEN` set
|
||||
with your API key.
|
||||
|
||||
For details, see https://docs.apify.com/platform/integrations/crewai
|
||||
|
||||
Args:
|
||||
actor_name (str): The name of the Apify Actor to run.
|
||||
*args: Variable length argument list passed to BaseTool.
|
||||
**kwargs: Arbitrary keyword arguments passed to BaseTool.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: Results from the Actor execution.
|
||||
|
||||
Raises:
|
||||
ValueError: If `APIFY_API_TOKEN` is not set or if the tool is not initialized.
|
||||
ImportError: If `langchain_apify` package is not installed.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
from crewai_tools import ApifyActorsTool
|
||||
|
||||
tool = ApifyActorsTool(actor_name="apify/rag-web-browser")
|
||||
|
||||
results = tool.run(run_input={"query": "What is CrewAI?", "maxResults": 5})
|
||||
for result in results:
|
||||
print(f"URL: {result['metadata']['url']}")
|
||||
print(f"Content: {result.get('markdown', 'N/A')[:100]}...")
|
||||
"""
|
||||
actor_tool: '_ApifyActorsTool' = Field(description="Apify Actor Tool")
|
||||
package_dependencies: List[str] = ["langchain-apify"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
actor_name: str,
|
||||
*args: Any,
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
if not os.environ.get("APIFY_API_TOKEN"):
|
||||
msg = (
|
||||
"APIFY_API_TOKEN environment variable is not set. "
|
||||
"Please set it to your API key, to learn how to get it, "
|
||||
"see https://docs.apify.com/platform/integrations/api"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
try:
|
||||
from langchain_apify import ApifyActorsTool as _ApifyActorsTool
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import langchain_apify python package. "
|
||||
"Please install it with `pip install langchain-apify` or `uv add langchain-apify`."
|
||||
)
|
||||
actor_tool = _ApifyActorsTool(actor_name)
|
||||
|
||||
kwargs.update(
|
||||
{
|
||||
"name": actor_tool.name,
|
||||
"description": actor_tool.description,
|
||||
"args_schema": actor_tool.args_schema,
|
||||
"actor_tool": actor_tool,
|
||||
}
|
||||
)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def _run(self, run_input: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Run the Actor tool with the given input.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: Results from the Actor execution.
|
||||
|
||||
Raises:
|
||||
ValueError: If 'actor_tool' is not initialized.
|
||||
"""
|
||||
try:
|
||||
return self.actor_tool._run(run_input)
|
||||
except Exception as e:
|
||||
msg = (
|
||||
f'Failed to run ApifyActorsTool {self.name}. '
|
||||
'Please check your Apify account Actor run logs for more details.'
|
||||
f'Error: {e}'
|
||||
)
|
||||
raise RuntimeError(msg) from e
|
||||
80
crewai_tools/tools/arxiv_paper_tool/Examples.md
Normal file
80
crewai_tools/tools/arxiv_paper_tool/Examples.md
Normal file
@@ -0,0 +1,80 @@
|
||||
### Example 1: Fetching Research Papers from arXiv with CrewAI
|
||||
|
||||
This example demonstrates how to build a simple CrewAI workflow that automatically searches for and downloads academic papers from [arXiv.org](https://arxiv.org). The setup uses:
|
||||
|
||||
* A custom `ArxivPaperTool` to fetch metadata and download PDFs
|
||||
* A single `Agent` tasked with locating relevant papers based on a given research topic
|
||||
* A `Task` to define the data retrieval and download process
|
||||
* A sequential `Crew` to orchestrate execution
|
||||
|
||||
The downloaded PDFs are saved to a local directory (`./DOWNLOADS`). Filenames are optionally based on sanitized paper titles, ensuring compatibility with your operating system.
|
||||
|
||||
> The saved PDFs can be further used in **downstream tasks**, such as:
|
||||
>
|
||||
> * **RAG (Retrieval-Augmented Generation)**
|
||||
> * **Summarization**
|
||||
> * **Citation extraction**
|
||||
> * **Embedding-based search or analysis**
|
||||
|
||||
---
|
||||
|
||||
|
||||
```
|
||||
from crewai import Agent, Task, Crew, Process, LLM
|
||||
from crewai_tools import ArxivPaperTool
|
||||
|
||||
|
||||
|
||||
llm = LLM(
|
||||
model="ollama/llama3.1",
|
||||
base_url="http://localhost:11434",
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
|
||||
topic = "Crew AI"
|
||||
max_results = 3
|
||||
save_dir = "./DOWNLOADS"
|
||||
use_title_as_filename = True
|
||||
|
||||
tool = ArxivPaperTool(
|
||||
download_pdfs=True,
|
||||
save_dir=save_dir,
|
||||
use_title_as_filename=True
|
||||
)
|
||||
tool.result_as_answer = True #Required,otherwise
|
||||
|
||||
|
||||
arxiv_paper_fetch = Agent(
|
||||
role="Arxiv Data Fetcher",
|
||||
goal=f"Retrieve relevant papers from arXiv based on a research topic {topic} and maximum number of papers to be downloaded is{max_results},try to use title as filename {use_title_as_filename} and download PDFs to {save_dir},",
|
||||
backstory="An expert in scientific data retrieval, skilled in extracting academic content from arXiv.",
|
||||
# tools=[ArxivPaperTool()],
|
||||
llm=llm,
|
||||
verbose=True,
|
||||
allow_delegation=False
|
||||
)
|
||||
fetch_task = Task(
|
||||
description=(
|
||||
f"Search arXiv for the topic '{topic}' and fetch up to {max_results} papers. "
|
||||
f"Download PDFs for analysis and store them at {save_dir}."
|
||||
),
|
||||
expected_output="PDFs saved to disk for downstream agents.",
|
||||
agent=arxiv_paper_fetch,
|
||||
tools=[tool], # Use the actual tool instance here
|
||||
|
||||
)
|
||||
|
||||
|
||||
pdf_qa_crew = Crew(
|
||||
agents=[arxiv_paper_fetch],
|
||||
tasks=[fetch_task],
|
||||
process=Process.sequential,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
|
||||
result = pdf_qa_crew.kickoff()
|
||||
|
||||
print(f"\n🤖 Answer:\n\n{result.raw}\n")
|
||||
```
|
||||
142
crewai_tools/tools/arxiv_paper_tool/README.md
Normal file
142
crewai_tools/tools/arxiv_paper_tool/README.md
Normal file
@@ -0,0 +1,142 @@
|
||||
# ArxivPaperTool
|
||||
|
||||
|
||||
# 📚 ArxivPaperTool
|
||||
|
||||
The **ArxivPaperTool** is a utility for fetching metadata and optionally downloading PDFs of academic papers from the [arXiv](https://arxiv.org) platform using its public API. It supports configurable queries, batch retrieval, PDF downloading, and clean formatting for summaries and metadata. This tool is particularly useful for researchers, students, academic agents, and AI tools performing automated literature reviews.
|
||||
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
This tool:
|
||||
|
||||
* Accepts a **search query** and retrieves a list of papers from arXiv.
|
||||
* Allows configuration of the **maximum number of results** to fetch.
|
||||
* Optionally downloads the **PDFs** of the matched papers.
|
||||
* Lets you specify whether to name PDF files using the **arXiv ID** or **paper title**.
|
||||
* Saves downloaded files into a **custom or default directory**.
|
||||
* Returns structured summaries of all fetched papers including metadata.
|
||||
|
||||
---
|
||||
|
||||
## Arguments
|
||||
|
||||
| Argument | Type | Required | Description |
|
||||
| ----------------------- | ------ | -------- | --------------------------------------------------------------------------------- |
|
||||
| `search_query` | `str` | ✅ | Search query string (e.g., `"transformer neural network"`). |
|
||||
| `max_results` | `int` | ✅ | Number of results to fetch (between 1 and 100). |
|
||||
| `download_pdfs` | `bool` | ❌ | Whether to download the corresponding PDFs. Defaults to `False`. |
|
||||
| `save_dir` | `str` | ❌ | Directory to save PDFs (created if it doesn’t exist). Defaults to `./arxiv_pdfs`. |
|
||||
| `use_title_as_filename` | `bool` | ❌ | Use the paper title as the filename (sanitized). Defaults to `False`. |
|
||||
|
||||
---
|
||||
|
||||
## 📄 `ArxivPaperTool` Usage Examples
|
||||
|
||||
This document shows how to use the `ArxivPaperTool` to fetch research paper metadata from arXiv and optionally download PDFs.
|
||||
|
||||
### 🔧 Tool Initialization
|
||||
|
||||
```python
|
||||
from crewai_tools import ArxivPaperTool
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 1: Fetch Metadata Only (No Downloads)
|
||||
|
||||
```python
|
||||
tool = ArxivPaperTool()
|
||||
result = tool._run(
|
||||
search_query="deep learning",
|
||||
max_results=1
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 2: Fetch and Download PDFs (arXiv ID as Filename)
|
||||
|
||||
```python
|
||||
tool = ArxivPaperTool(download_pdfs=True)
|
||||
result = tool._run(
|
||||
search_query="transformer models",
|
||||
max_results=2
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 3: Download PDFs into a Custom Directory
|
||||
|
||||
```python
|
||||
tool = ArxivPaperTool(
|
||||
download_pdfs=True,
|
||||
save_dir="./my_papers"
|
||||
)
|
||||
result = tool._run(
|
||||
search_query="graph neural networks",
|
||||
max_results=2
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 4: Use Paper Titles as Filenames
|
||||
|
||||
```python
|
||||
tool = ArxivPaperTool(
|
||||
download_pdfs=True,
|
||||
use_title_as_filename=True
|
||||
)
|
||||
result = tool._run(
|
||||
search_query="vision transformers",
|
||||
max_results=1
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Example 5: All Options Combined
|
||||
|
||||
```python
|
||||
tool = ArxivPaperTool(
|
||||
download_pdfs=True,
|
||||
save_dir="./downloads",
|
||||
use_title_as_filename=True
|
||||
)
|
||||
result = tool._run(
|
||||
search_query="stable diffusion",
|
||||
max_results=3
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Run via `__main__`
|
||||
|
||||
Your file can also include:
|
||||
|
||||
```python
|
||||
if __name__ == "__main__":
|
||||
tool = ArxivPaperTool(
|
||||
download_pdfs=True,
|
||||
save_dir="./downloads2",
|
||||
use_title_as_filename=False
|
||||
)
|
||||
result = tool._run(
|
||||
search_query="deep learning",
|
||||
max_results=1
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
152
crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py
Normal file
152
crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py
Normal file
@@ -0,0 +1,152 @@
|
||||
import re
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Type, List, Optional, ClassVar
|
||||
from pydantic import BaseModel, Field
|
||||
from crewai.tools import BaseTool,EnvVar
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
class ArxivToolInput(BaseModel):
|
||||
search_query: str = Field(..., description="Search query for Arxiv, e.g., 'transformer neural network'")
|
||||
max_results: int = Field(5, ge=1, le=100, description="Max results to fetch; must be between 1 and 100")
|
||||
|
||||
class ArxivPaperTool(BaseTool):
|
||||
BASE_API_URL: ClassVar[str] = "http://export.arxiv.org/api/query"
|
||||
SLEEP_DURATION: ClassVar[int] = 1
|
||||
SUMMARY_TRUNCATE_LENGTH: ClassVar[int] = 300
|
||||
ATOM_NAMESPACE: ClassVar[str] = "{http://www.w3.org/2005/Atom}"
|
||||
REQUEST_TIMEOUT: ClassVar[int] = 10
|
||||
name: str = "Arxiv Paper Fetcher and Downloader"
|
||||
description: str = "Fetches metadata from Arxiv based on a search query and optionally downloads PDFs."
|
||||
args_schema: Type[BaseModel] = ArxivToolInput
|
||||
model_config = {"extra": "allow"}
|
||||
package_dependencies: List[str] = ["pydantic"]
|
||||
env_vars: List[EnvVar] = []
|
||||
|
||||
def __init__(self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False):
|
||||
super().__init__()
|
||||
self.download_pdfs = download_pdfs
|
||||
self.save_dir = save_dir
|
||||
self.use_title_as_filename = use_title_as_filename
|
||||
|
||||
def _run(self, search_query: str, max_results: int = 5) -> str:
|
||||
try:
|
||||
args = ArxivToolInput(search_query=search_query, max_results=max_results)
|
||||
logger.info(f"Running Arxiv tool: query='{args.search_query}', max_results={args.max_results}, "
|
||||
f"download_pdfs={self.download_pdfs}, save_dir='{self.save_dir}', "
|
||||
f"use_title_as_filename={self.use_title_as_filename}")
|
||||
|
||||
papers = self.fetch_arxiv_data(args.search_query, args.max_results)
|
||||
|
||||
if self.download_pdfs:
|
||||
save_dir = self._validate_save_path(self.save_dir)
|
||||
for paper in papers:
|
||||
if paper['pdf_url']:
|
||||
if self.use_title_as_filename:
|
||||
safe_title = re.sub(r'[\\/*?:"<>|]', "_", paper['title']).strip()
|
||||
filename_base = safe_title or paper['arxiv_id']
|
||||
else:
|
||||
filename_base = paper['arxiv_id']
|
||||
filename = f"{filename_base[:500]}.pdf"
|
||||
save_path = Path(save_dir) / filename
|
||||
|
||||
self.download_pdf(paper['pdf_url'], save_path)
|
||||
time.sleep(self.SLEEP_DURATION)
|
||||
|
||||
results = [self._format_paper_result(p) for p in papers]
|
||||
return "\n\n" + "-" * 80 + "\n\n".join(results)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"ArxivTool Error: {str(e)}")
|
||||
return f"Failed to fetch or download Arxiv papers: {str(e)}"
|
||||
|
||||
|
||||
def fetch_arxiv_data(self, search_query: str, max_results: int) -> List[dict]:
|
||||
api_url = f"{self.BASE_API_URL}?search_query={urllib.parse.quote(search_query)}&start=0&max_results={max_results}"
|
||||
logger.info(f"Fetching data from Arxiv API: {api_url}")
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(api_url, timeout=self.REQUEST_TIMEOUT) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"HTTP {response.status}: {response.reason}")
|
||||
data = response.read().decode('utf-8')
|
||||
except urllib.error.URLError as e:
|
||||
logger.error(f"Error fetching data from Arxiv: {e}")
|
||||
raise
|
||||
|
||||
root = ET.fromstring(data)
|
||||
papers = []
|
||||
|
||||
for entry in root.findall(self.ATOM_NAMESPACE + "entry"):
|
||||
raw_id = self._get_element_text(entry, "id")
|
||||
arxiv_id = raw_id.split('/')[-1].replace('.', '_') if raw_id else "unknown"
|
||||
|
||||
title = self._get_element_text(entry, "title") or "No Title"
|
||||
summary = self._get_element_text(entry, "summary") or "No Summary"
|
||||
published = self._get_element_text(entry, "published") or "No Publish Date"
|
||||
authors = [
|
||||
self._get_element_text(author, "name") or "Unknown"
|
||||
for author in entry.findall(self.ATOM_NAMESPACE + "author")
|
||||
]
|
||||
|
||||
pdf_url = self._extract_pdf_url(entry)
|
||||
|
||||
papers.append({
|
||||
"arxiv_id": arxiv_id,
|
||||
"title": title,
|
||||
"summary": summary,
|
||||
"authors": authors,
|
||||
"published_date": published,
|
||||
"pdf_url": pdf_url
|
||||
})
|
||||
|
||||
return papers
|
||||
|
||||
@staticmethod
|
||||
def _get_element_text(entry: ET.Element, element_name: str) -> Optional[str]:
|
||||
elem = entry.find(f'{ArxivPaperTool.ATOM_NAMESPACE}{element_name}')
|
||||
return elem.text.strip() if elem is not None and elem.text else None
|
||||
|
||||
def _extract_pdf_url(self, entry: ET.Element) -> Optional[str]:
|
||||
for link in entry.findall(self.ATOM_NAMESPACE + "link"):
|
||||
if link.attrib.get('title', '').lower() == 'pdf':
|
||||
return link.attrib.get('href')
|
||||
for link in entry.findall(self.ATOM_NAMESPACE + "link"):
|
||||
href = link.attrib.get('href')
|
||||
if href and 'pdf' in href:
|
||||
return href
|
||||
return None
|
||||
|
||||
def _format_paper_result(self, paper: dict) -> str:
|
||||
summary = (paper['summary'][:self.SUMMARY_TRUNCATE_LENGTH] + '...') \
|
||||
if len(paper['summary']) > self.SUMMARY_TRUNCATE_LENGTH else paper['summary']
|
||||
authors_str = ', '.join(paper['authors'])
|
||||
return (f"Title: {paper['title']}\n"
|
||||
f"Authors: {authors_str}\n"
|
||||
f"Published: {paper['published_date']}\n"
|
||||
f"PDF: {paper['pdf_url'] or 'N/A'}\n"
|
||||
f"Summary: {summary}")
|
||||
|
||||
@staticmethod
|
||||
def _validate_save_path(path: str) -> Path:
|
||||
save_path = Path(path).resolve()
|
||||
save_path.mkdir(parents=True, exist_ok=True)
|
||||
return save_path
|
||||
|
||||
def download_pdf(self, pdf_url: str, save_path: str):
|
||||
try:
|
||||
logger.info(f"Downloading PDF from {pdf_url} to {save_path}")
|
||||
urllib.request.urlretrieve(pdf_url, str(save_path))
|
||||
logger.info(f"PDF saved: {save_path}")
|
||||
except urllib.error.URLError as e:
|
||||
logger.error(f"Network error occurred while downloading {pdf_url}: {e}")
|
||||
raise
|
||||
except OSError as e:
|
||||
logger.error(f"File save error for {save_path}: {e}")
|
||||
raise
|
||||
113
crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py
Normal file
113
crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import pytest
|
||||
import urllib.error
|
||||
from unittest.mock import patch, MagicMock, mock_open
|
||||
from pathlib import Path
|
||||
import xml.etree.ElementTree as ET
|
||||
from crewai_tools import ArxivPaperTool
|
||||
|
||||
@pytest.fixture
|
||||
def tool():
|
||||
return ArxivPaperTool(download_pdfs=False)
|
||||
|
||||
def mock_arxiv_response():
|
||||
return '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<entry>
|
||||
<id>http://arxiv.org/abs/1234.5678</id>
|
||||
<title>Sample Paper</title>
|
||||
<summary>This is a summary of the sample paper.</summary>
|
||||
<published>2022-01-01T00:00:00Z</published>
|
||||
<author><name>John Doe</name></author>
|
||||
<link title="pdf" href="http://arxiv.org/pdf/1234.5678.pdf"/>
|
||||
</entry>
|
||||
</feed>'''
|
||||
|
||||
@patch("urllib.request.urlopen")
|
||||
def test_fetch_arxiv_data(mock_urlopen, tool):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status = 200
|
||||
mock_response.read.return_value = mock_arxiv_response().encode("utf-8")
|
||||
mock_urlopen.return_value.__enter__.return_value = mock_response
|
||||
|
||||
results = tool.fetch_arxiv_data("transformer", 1)
|
||||
assert isinstance(results, list)
|
||||
assert results[0]['title'] == "Sample Paper"
|
||||
|
||||
@patch("urllib.request.urlopen", side_effect=urllib.error.URLError("Timeout"))
|
||||
def test_fetch_arxiv_data_network_error(mock_urlopen, tool):
|
||||
with pytest.raises(urllib.error.URLError):
|
||||
tool.fetch_arxiv_data("transformer", 1)
|
||||
|
||||
@patch("urllib.request.urlretrieve")
|
||||
def test_download_pdf_success(mock_urlretrieve):
|
||||
tool = ArxivPaperTool()
|
||||
tool.download_pdf("http://arxiv.org/pdf/1234.5678.pdf", Path("test.pdf"))
|
||||
mock_urlretrieve.assert_called_once()
|
||||
|
||||
@patch("urllib.request.urlretrieve", side_effect=OSError("Permission denied"))
|
||||
def test_download_pdf_oserror(mock_urlretrieve):
|
||||
tool = ArxivPaperTool()
|
||||
with pytest.raises(OSError):
|
||||
tool.download_pdf("http://arxiv.org/pdf/1234.5678.pdf", Path("/restricted/test.pdf"))
|
||||
|
||||
@patch("urllib.request.urlopen")
|
||||
@patch("urllib.request.urlretrieve")
|
||||
def test_run_with_download(mock_urlretrieve, mock_urlopen):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status = 200
|
||||
mock_response.read.return_value = mock_arxiv_response().encode("utf-8")
|
||||
mock_urlopen.return_value.__enter__.return_value = mock_response
|
||||
|
||||
tool = ArxivPaperTool(download_pdfs=True)
|
||||
output = tool._run("transformer", 1)
|
||||
assert "Title: Sample Paper" in output
|
||||
mock_urlretrieve.assert_called_once()
|
||||
|
||||
@patch("urllib.request.urlopen")
|
||||
def test_run_no_download(mock_urlopen):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status = 200
|
||||
mock_response.read.return_value = mock_arxiv_response().encode("utf-8")
|
||||
mock_urlopen.return_value.__enter__.return_value = mock_response
|
||||
|
||||
tool = ArxivPaperTool(download_pdfs=False)
|
||||
result = tool._run("transformer", 1)
|
||||
assert "Title: Sample Paper" in result
|
||||
|
||||
@patch("pathlib.Path.mkdir")
|
||||
def test_validate_save_path_creates_directory(mock_mkdir):
|
||||
path = ArxivPaperTool._validate_save_path("new_folder")
|
||||
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
|
||||
assert isinstance(path, Path)
|
||||
|
||||
@patch("urllib.request.urlopen")
|
||||
def test_run_handles_exception(mock_urlopen):
|
||||
mock_urlopen.side_effect = Exception("API failure")
|
||||
tool = ArxivPaperTool()
|
||||
result = tool._run("transformer", 1)
|
||||
assert "Failed to fetch or download Arxiv papers" in result
|
||||
|
||||
|
||||
@patch("urllib.request.urlopen")
|
||||
def test_invalid_xml_response(mock_urlopen, tool):
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = b"<invalid><xml>"
|
||||
mock_response.status = 200
|
||||
mock_urlopen.return_value.__enter__.return_value = mock_response
|
||||
|
||||
with pytest.raises(ET.ParseError):
|
||||
tool.fetch_arxiv_data("quantum", 1)
|
||||
|
||||
@patch.object(ArxivPaperTool, "fetch_arxiv_data")
|
||||
def test_run_with_max_results(mock_fetch, tool):
|
||||
mock_fetch.return_value = [{
|
||||
"arxiv_id": f"test_{i}",
|
||||
"title": f"Title {i}",
|
||||
"summary": "Summary",
|
||||
"authors": ["Author"],
|
||||
"published_date": "2023-01-01",
|
||||
"pdf_url": None
|
||||
} for i in range(100)]
|
||||
|
||||
result = tool._run(search_query="test", max_results=100)
|
||||
assert result.count("Title:") == 100
|
||||
30
crewai_tools/tools/brave_search_tool/README.md
Normal file
30
crewai_tools/tools/brave_search_tool/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# BraveSearchTool Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import BraveSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = BraveSearchTool()
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `BraveSearchTool`, follow these steps:
|
||||
|
||||
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys).
|
||||
3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool.
|
||||
|
||||
## Conclusion
|
||||
By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
|
||||
0
crewai_tools/tools/brave_search_tool/__init__.py
Normal file
0
crewai_tools/tools/brave_search_tool/__init__.py
Normal file
121
crewai_tools/tools/brave_search_tool/brave_search_tool.py
Normal file
121
crewai_tools/tools/brave_search_tool/brave_search_tool.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
from typing import Any, ClassVar, List, Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
def _save_results_to_file(content: str) -> None:
|
||||
"""Saves the search results to a file."""
|
||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
with open(filename, "w") as file:
|
||||
file.write(content)
|
||||
print(f"Results saved to {filename}")
|
||||
|
||||
|
||||
class BraveSearchToolSchema(BaseModel):
|
||||
"""Input for BraveSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to search the internet"
|
||||
)
|
||||
|
||||
|
||||
class BraveSearchTool(BaseTool):
|
||||
"""
|
||||
BraveSearchTool - A tool for performing web searches using the Brave Search API.
|
||||
|
||||
This module provides functionality to search the internet using Brave's Search API,
|
||||
supporting customizable result counts and country-specific searches.
|
||||
|
||||
Dependencies:
|
||||
- requests
|
||||
- pydantic
|
||||
- python-dotenv (for API key management)
|
||||
"""
|
||||
|
||||
name: str = "Brave Web Search the internet"
|
||||
description: str = (
|
||||
"A tool that can be used to search the internet with a search_query."
|
||||
)
|
||||
args_schema: Type[BaseModel] = BraveSearchToolSchema
|
||||
search_url: str = "https://api.search.brave.com/res/v1/web/search"
|
||||
country: Optional[str] = ""
|
||||
n_results: int = 10
|
||||
save_file: bool = False
|
||||
_last_request_time: ClassVar[float] = 0
|
||||
_min_request_interval: ClassVar[float] = 1.0 # seconds
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="BRAVE_API_KEY", description="API key for Brave Search", required=True),
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "BRAVE_API_KEY" not in os.environ:
|
||||
raise ValueError(
|
||||
"BRAVE_API_KEY environment variable is required for BraveSearchTool"
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
current_time = time.time()
|
||||
if (current_time - self._last_request_time) < self._min_request_interval:
|
||||
time.sleep(
|
||||
self._min_request_interval - (current_time - self._last_request_time)
|
||||
)
|
||||
BraveSearchTool._last_request_time = time.time()
|
||||
try:
|
||||
search_query = kwargs.get("search_query") or kwargs.get("query")
|
||||
if not search_query:
|
||||
raise ValueError("Search query is required")
|
||||
|
||||
save_file = kwargs.get("save_file", self.save_file)
|
||||
n_results = kwargs.get("n_results", self.n_results)
|
||||
|
||||
payload = {"q": search_query, "count": n_results}
|
||||
|
||||
if self.country != "":
|
||||
payload["country"] = self.country
|
||||
|
||||
headers = {
|
||||
"X-Subscription-Token": os.environ["BRAVE_API_KEY"],
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
response = requests.get(self.search_url, headers=headers, params=payload)
|
||||
response.raise_for_status() # Handle non-200 responses
|
||||
results = response.json()
|
||||
|
||||
if "web" in results:
|
||||
results = results["web"]["results"]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['url']}",
|
||||
f"Snippet: {result['description']}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = "\n".join(string)
|
||||
except requests.RequestException as e:
|
||||
return f"Error performing search: {str(e)}"
|
||||
except KeyError as e:
|
||||
return f"Error parsing search results: {str(e)}"
|
||||
if save_file:
|
||||
_save_results_to_file(content)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return content
|
||||
79
crewai_tools/tools/brightdata_tool/README.md
Normal file
79
crewai_tools/tools/brightdata_tool/README.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# BrightData Tools Documentation
|
||||
|
||||
## Description
|
||||
|
||||
A comprehensive suite of CrewAI tools that leverage Bright Data's powerful infrastructure for web scraping, data extraction, and search operations. These tools provide three distinct capabilities:
|
||||
|
||||
- **BrightDataDatasetTool**: Extract structured data from popular data feeds (Amazon, LinkedIn, Instagram, etc.) using pre-built datasets
|
||||
- **BrightDataSearchTool**: Perform web searches across multiple search engines with geo-targeting and device simulation
|
||||
- **BrightDataWebUnlockerTool**: Scrape any website content while bypassing bot protection mechanisms
|
||||
|
||||
## Installation
|
||||
|
||||
To incorporate these tools into your project, follow the installation instructions below:
|
||||
|
||||
```shell
|
||||
pip install crewai[tools] aiohttp requests
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Dataset Tool - Extract Amazon Product Data
|
||||
```python
|
||||
from crewai_tools import BrightDataDatasetTool
|
||||
|
||||
# Initialize with specific dataset and URL
|
||||
tool = BrightDataDatasetTool(
|
||||
dataset_type="amazon_product",
|
||||
url="https://www.amazon.com/dp/B08QB1QMJ5/"
|
||||
)
|
||||
result = tool.run()
|
||||
```
|
||||
|
||||
### Search Tool - Perform Web Search
|
||||
```python
|
||||
from crewai_tools import BrightDataSearchTool
|
||||
|
||||
# Initialize with search query
|
||||
tool = BrightDataSearchTool(
|
||||
query="latest AI trends 2025",
|
||||
search_engine="google",
|
||||
country="us"
|
||||
)
|
||||
result = tool.run()
|
||||
```
|
||||
|
||||
### Web Unlocker Tool - Scrape Website Content
|
||||
```python
|
||||
from crewai_tools import BrightDataWebUnlockerTool
|
||||
|
||||
# Initialize with target URL
|
||||
tool = BrightDataWebUnlockerTool(
|
||||
url="https://example.com",
|
||||
data_format="markdown"
|
||||
)
|
||||
result = tool.run()
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
|
||||
To effectively use the BrightData Tools, follow these steps:
|
||||
|
||||
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
|
||||
2. **API Key Acquisition**: Register for a Bright Data account at `https://brightdata.com/` and obtain your API credentials from your account settings.
|
||||
|
||||
3. **Environment Configuration**: Set up the required environment variables:
|
||||
```bash
|
||||
export BRIGHT_DATA_API_KEY="your_api_key_here"
|
||||
export BRIGHT_DATA_ZONE="your_zone_here"
|
||||
```
|
||||
|
||||
4. **Tool Selection**: Choose the appropriate tool based on your needs:
|
||||
- Use **DatasetTool** for structured data from supported platforms
|
||||
- Use **SearchTool** for web search operations
|
||||
- Use **WebUnlockerTool** for general website scraping
|
||||
|
||||
## Conclusion
|
||||
|
||||
By integrating BrightData Tools into your CrewAI agents, you gain access to enterprise-grade web scraping and data extraction capabilities. These tools handle complex challenges like bot protection, geo-restrictions, and data parsing, allowing you to focus on building your applications rather than managing scraping infrastructure.
|
||||
9
crewai_tools/tools/brightdata_tool/__init__.py
Normal file
9
crewai_tools/tools/brightdata_tool/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from .brightdata_dataset import BrightDataDatasetTool
|
||||
from .brightdata_serp import BrightDataSearchTool
|
||||
from .brightdata_unlocker import BrightDataWebUnlockerTool
|
||||
|
||||
__all__ = [
|
||||
"BrightDataDatasetTool",
|
||||
"BrightDataSearchTool",
|
||||
"BrightDataWebUnlockerTool"
|
||||
]
|
||||
570
crewai_tools/tools/brightdata_tool/brightdata_dataset.py
Normal file
570
crewai_tools/tools/brightdata_tool/brightdata_dataset.py
Normal file
@@ -0,0 +1,570 @@
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Any, Dict, Optional, Type
|
||||
|
||||
import aiohttp
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class BrightDataConfig(BaseModel):
|
||||
API_URL: str = "https://api.brightdata.com"
|
||||
DEFAULT_TIMEOUT: int = 600
|
||||
DEFAULT_POLLING_INTERVAL: int = 1
|
||||
|
||||
@classmethod
|
||||
def from_env(cls):
|
||||
return cls(
|
||||
API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com"),
|
||||
DEFAULT_TIMEOUT=int(os.environ.get("BRIGHTDATA_DEFAULT_TIMEOUT", "600")),
|
||||
DEFAULT_POLLING_INTERVAL=int(os.environ.get("BRIGHTDATA_DEFAULT_POLLING_INTERVAL", "1"))
|
||||
)
|
||||
class BrightDataDatasetToolException(Exception):
|
||||
"""Exception raised for custom error in the application."""
|
||||
|
||||
def __init__(self, message, error_code):
|
||||
self.message = message
|
||||
super().__init__(message)
|
||||
self.error_code = error_code
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.message} (Error Code: {self.error_code})"
|
||||
|
||||
|
||||
class BrightDataDatasetToolSchema(BaseModel):
|
||||
"""
|
||||
Schema for validating input parameters for the BrightDataDatasetTool.
|
||||
|
||||
Attributes:
|
||||
dataset_type (str): Required Bright Data Dataset Type used to specify which dataset to access.
|
||||
format (str): Response format (json by default). Multiple formats exist - json, ndjson, jsonl, csv
|
||||
url (str): The URL from which structured data needs to be extracted.
|
||||
zipcode (Optional[str]): An optional ZIP code to narrow down the data geographically.
|
||||
additional_params (Optional[Dict]): Extra parameters for the Bright Data API call.
|
||||
"""
|
||||
|
||||
dataset_type: str = Field(..., description="The Bright Data Dataset Type")
|
||||
format: Optional[str] = Field(
|
||||
default="json", description="Response format (json by default)"
|
||||
)
|
||||
url: str = Field(..., description="The URL to extract data from")
|
||||
zipcode: Optional[str] = Field(default=None, description="Optional zipcode")
|
||||
additional_params: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Additional params if any"
|
||||
)
|
||||
|
||||
config = BrightDataConfig.from_env()
|
||||
|
||||
BRIGHTDATA_API_URL = config.API_URL
|
||||
timeout = config.DEFAULT_TIMEOUT
|
||||
|
||||
datasets = [
|
||||
{
|
||||
"id": "amazon_product",
|
||||
"dataset_id": "gd_l7q7dkf244hwjntr0",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured amazon product data.",
|
||||
"Requires a valid product URL with /dp/ in it.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "amazon_product_reviews",
|
||||
"dataset_id": "gd_le8e811kzy4ggddlq",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured amazon product review data.",
|
||||
"Requires a valid product URL with /dp/ in it.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "amazon_product_search",
|
||||
"dataset_id": "gd_lwdb4vjm1ehb499uxs",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured amazon product search data.",
|
||||
"Requires a valid search keyword and amazon domain URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["keyword", "url", "pages_to_search"],
|
||||
"defaults": {"pages_to_search": "1"},
|
||||
},
|
||||
{
|
||||
"id": "walmart_product",
|
||||
"dataset_id": "gd_l95fol7l1ru6rlo116",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured walmart product data.",
|
||||
"Requires a valid product URL with /ip/ in it.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "walmart_seller",
|
||||
"dataset_id": "gd_m7ke48w81ocyu4hhz0",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured walmart seller data.",
|
||||
"Requires a valid walmart seller URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "ebay_product",
|
||||
"dataset_id": "gd_ltr9mjt81n0zzdk1fb",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured ebay product data.",
|
||||
"Requires a valid ebay product URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "homedepot_products",
|
||||
"dataset_id": "gd_lmusivh019i7g97q2n",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured homedepot product data.",
|
||||
"Requires a valid homedepot product URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "zara_products",
|
||||
"dataset_id": "gd_lct4vafw1tgx27d4o0",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured zara product data.",
|
||||
"Requires a valid zara product URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "etsy_products",
|
||||
"dataset_id": "gd_ltppk0jdv1jqz25mz",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured etsy product data.",
|
||||
"Requires a valid etsy product URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "bestbuy_products",
|
||||
"dataset_id": "gd_ltre1jqe1jfr7cccf",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured bestbuy product data.",
|
||||
"Requires a valid bestbuy product URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "linkedin_person_profile",
|
||||
"dataset_id": "gd_l1viktl72bvl7bjuj0",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured linkedin people profile data.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "linkedin_company_profile",
|
||||
"dataset_id": "gd_l1vikfnt1wgvvqz95w",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured linkedin company profile data",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "linkedin_job_listings",
|
||||
"dataset_id": "gd_lpfll7v5hcqtkxl6l",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured linkedin job listings data",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "linkedin_posts",
|
||||
"dataset_id": "gd_lyy3tktm25m4avu764",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured linkedin posts data",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "linkedin_people_search",
|
||||
"dataset_id": "gd_m8d03he47z8nwb5xc",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured linkedin people search data",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url", "first_name", "last_name"],
|
||||
},
|
||||
{
|
||||
"id": "crunchbase_company",
|
||||
"dataset_id": "gd_l1vijqt9jfj7olije",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured crunchbase company data",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "zoominfo_company_profile",
|
||||
"dataset_id": "gd_m0ci4a4ivx3j5l6nx",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured ZoomInfo company profile data.",
|
||||
"Requires a valid ZoomInfo company URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "instagram_profiles",
|
||||
"dataset_id": "gd_l1vikfch901nx3by4",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Instagram profile data.",
|
||||
"Requires a valid Instagram URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "instagram_posts",
|
||||
"dataset_id": "gd_lk5ns7kz21pck8jpis",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Instagram post data.",
|
||||
"Requires a valid Instagram URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "instagram_reels",
|
||||
"dataset_id": "gd_lyclm20il4r5helnj",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Instagram reel data.",
|
||||
"Requires a valid Instagram URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "instagram_comments",
|
||||
"dataset_id": "gd_ltppn085pokosxh13",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Instagram comments data.",
|
||||
"Requires a valid Instagram URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "facebook_posts",
|
||||
"dataset_id": "gd_lyclm1571iy3mv57zw",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Facebook post data.",
|
||||
"Requires a valid Facebook post URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "facebook_marketplace_listings",
|
||||
"dataset_id": "gd_lvt9iwuh6fbcwmx1a",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Facebook marketplace listing data.",
|
||||
"Requires a valid Facebook marketplace listing URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "facebook_company_reviews",
|
||||
"dataset_id": "gd_m0dtqpiu1mbcyc2g86",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Facebook company reviews data.",
|
||||
"Requires a valid Facebook company URL and number of reviews.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url", "num_of_reviews"],
|
||||
},
|
||||
{
|
||||
"id": "facebook_events",
|
||||
"dataset_id": "gd_m14sd0to1jz48ppm51",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Facebook events data.",
|
||||
"Requires a valid Facebook event URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "tiktok_profiles",
|
||||
"dataset_id": "gd_l1villgoiiidt09ci",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Tiktok profiles data.",
|
||||
"Requires a valid Tiktok profile URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "tiktok_posts",
|
||||
"dataset_id": "gd_lu702nij2f790tmv9h",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Tiktok post data.",
|
||||
"Requires a valid Tiktok post URL.",
|
||||
"This can be a cache lookup, so it can be more reliable than scraping",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
{
|
||||
"id": "tiktok_shop",
|
||||
"dataset_id": "gd_m45m1u911dsa4274pi",
|
||||
"description": "\n".join(
|
||||
[
|
||||
"Quickly read structured Tiktok shop data.",
|
||||
"Requires a valid Tiktok shop product URL.",
|
||||
"This can be a cache lookup...",
|
||||
]
|
||||
),
|
||||
"inputs": ["url"],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class BrightDataDatasetTool(BaseTool):
|
||||
"""
|
||||
CrewAI-compatible tool for scraping structured data using Bright Data Datasets.
|
||||
|
||||
Attributes:
|
||||
name (str): Tool name displayed in the CrewAI environment.
|
||||
description (str): Tool description shown to agents or users.
|
||||
args_schema (Type[BaseModel]): Pydantic schema for validating input arguments.
|
||||
"""
|
||||
|
||||
name: str = "Bright Data Dataset Tool"
|
||||
description: str = "Scrapes structured data using Bright Data Dataset API from a URL and optional input parameters"
|
||||
args_schema: Type[BaseModel] = BrightDataDatasetToolSchema
|
||||
dataset_type: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
format: str = "json"
|
||||
zipcode: Optional[str] = None
|
||||
additional_params: Optional[Dict[str, Any]] = None
|
||||
|
||||
def __init__(self, dataset_type: str = None, url: str = None, format: str = "json", zipcode: str = None, additional_params: Dict[str, Any] = None):
|
||||
super().__init__()
|
||||
self.dataset_type = dataset_type
|
||||
self.url = url
|
||||
self.format = format
|
||||
self.zipcode = zipcode
|
||||
self.additional_params = additional_params
|
||||
|
||||
def filter_dataset_by_id(self, target_id):
|
||||
return [dataset for dataset in datasets if dataset["id"] == target_id]
|
||||
|
||||
async def get_dataset_data_async(
|
||||
self,
|
||||
dataset_type: str,
|
||||
output_format: str,
|
||||
url: str,
|
||||
zipcode: Optional[str] = None,
|
||||
additional_params: Optional[Dict[str, Any]] = None,
|
||||
polling_interval: int = 1,
|
||||
) -> Dict:
|
||||
"""
|
||||
Asynchronously trigger and poll Bright Data dataset scraping.
|
||||
|
||||
Args:
|
||||
dataset_type (str): Bright Data Dataset Type.
|
||||
url (str): Target URL to scrape.
|
||||
zipcode (Optional[str]): Optional ZIP code for geo-specific data.
|
||||
additional_params (Optional[Dict]): Extra API parameters.
|
||||
polling_interval (int): Time interval in seconds between polling attempts.
|
||||
|
||||
Returns:
|
||||
Dict: Structured dataset result from Bright Data.
|
||||
|
||||
Raises:
|
||||
Exception: If any API step fails or the job fails.
|
||||
TimeoutError: If polling times out before job completion.
|
||||
"""
|
||||
request_data = {"url": url}
|
||||
if zipcode is not None:
|
||||
request_data["zipcode"] = zipcode
|
||||
|
||||
# Set additional parameters dynamically depending upon the dataset that is being requested
|
||||
if additional_params:
|
||||
request_data.update(additional_params)
|
||||
|
||||
api_key = os.getenv("BRIGHT_DATA_API_KEY")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
dataset_id = ""
|
||||
dataset = self.filter_dataset_by_id(dataset_type)
|
||||
|
||||
if len(dataset) == 1:
|
||||
dataset_id = dataset[0]["dataset_id"]
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unable to find the dataset for {dataset_type}. Please make sure to pass a valid one"
|
||||
)
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Step 1: Trigger job
|
||||
async with session.post(
|
||||
f"{BRIGHTDATA_API_URL}/datasets/v3/trigger",
|
||||
params={"dataset_id": dataset_id, "include_errors": "true"},
|
||||
json=[request_data],
|
||||
headers=headers,
|
||||
) as trigger_response:
|
||||
if trigger_response.status != 200:
|
||||
raise BrightDataDatasetToolException(
|
||||
f"Trigger failed: {await trigger_response.text()}",
|
||||
trigger_response.status,
|
||||
)
|
||||
trigger_data = await trigger_response.json()
|
||||
print(trigger_data)
|
||||
snapshot_id = trigger_data.get("snapshot_id")
|
||||
|
||||
# Step 2: Poll for completion
|
||||
elapsed = 0
|
||||
while elapsed < timeout:
|
||||
await asyncio.sleep(polling_interval)
|
||||
elapsed += polling_interval
|
||||
|
||||
async with session.get(
|
||||
f"{BRIGHTDATA_API_URL}/datasets/v3/progress/{snapshot_id}",
|
||||
headers=headers,
|
||||
) as status_response:
|
||||
if status_response.status != 200:
|
||||
raise BrightDataDatasetToolException(
|
||||
f"Status check failed: {await status_response.text()}",
|
||||
status_response.status,
|
||||
)
|
||||
status_data = await status_response.json()
|
||||
if status_data.get("status") == "ready":
|
||||
print("Job is ready")
|
||||
break
|
||||
elif status_data.get("status") == "error":
|
||||
raise BrightDataDatasetToolException(
|
||||
f"Job failed: {status_data}", 0
|
||||
)
|
||||
else:
|
||||
raise TimeoutError("Polling timed out before job completed.")
|
||||
|
||||
# Step 3: Retrieve result
|
||||
async with session.get(
|
||||
f"{BRIGHTDATA_API_URL}/datasets/v3/snapshot/{snapshot_id}",
|
||||
params={"format": output_format},
|
||||
headers=headers,
|
||||
) as snapshot_response:
|
||||
if snapshot_response.status != 200:
|
||||
raise BrightDataDatasetToolException(
|
||||
f"Result fetch failed: {await snapshot_response.text()}",
|
||||
snapshot_response.status,
|
||||
)
|
||||
|
||||
return await snapshot_response.text()
|
||||
|
||||
def _run(self, url: str = None, dataset_type: str = None, format: str = None, zipcode: str = None, additional_params: Dict[str, Any] = None, **kwargs: Any) -> Any:
|
||||
dataset_type = dataset_type or self.dataset_type
|
||||
output_format = format or self.format
|
||||
url = url or self.url
|
||||
zipcode = zipcode or self.zipcode
|
||||
additional_params = additional_params or self.additional_params
|
||||
|
||||
if not dataset_type:
|
||||
raise ValueError("dataset_type is required either in constructor or method call")
|
||||
if not url:
|
||||
raise ValueError("url is required either in constructor or method call")
|
||||
|
||||
valid_output_formats = {"json", "ndjson", "jsonl", "csv"}
|
||||
if output_format not in valid_output_formats:
|
||||
raise ValueError(
|
||||
f"Unsupported output format: {output_format}. Must be one of {', '.join(valid_output_formats)}."
|
||||
)
|
||||
|
||||
api_key = os.getenv("BRIGHT_DATA_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("BRIGHT_DATA_API_KEY environment variable is required.")
|
||||
|
||||
try:
|
||||
return asyncio.run(
|
||||
self.get_dataset_data_async(
|
||||
dataset_type=dataset_type,
|
||||
output_format=output_format,
|
||||
url=url,
|
||||
zipcode=zipcode,
|
||||
additional_params=additional_params,
|
||||
)
|
||||
)
|
||||
except TimeoutError as e:
|
||||
return f"Timeout Exception occured in method : get_dataset_data_async. Details - {str(e)}"
|
||||
except BrightDataDatasetToolException as e:
|
||||
return f"Exception occured in method : get_dataset_data_async. Details - {str(e)}"
|
||||
except Exception as e:
|
||||
return f"Bright Data API error: {str(e)}"
|
||||
207
crewai_tools/tools/brightdata_tool/brightdata_serp.py
Normal file
207
crewai_tools/tools/brightdata_tool/brightdata_serp.py
Normal file
@@ -0,0 +1,207 @@
|
||||
import os
|
||||
import urllib.parse
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class BrightDataConfig(BaseModel):
|
||||
API_URL: str = "https://api.brightdata.com/request"
|
||||
|
||||
@classmethod
|
||||
def from_env(cls):
|
||||
return cls(
|
||||
API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com/request")
|
||||
)
|
||||
|
||||
class BrightDataSearchToolSchema(BaseModel):
|
||||
"""
|
||||
Schema that defines the input arguments for the BrightDataSearchToolSchema.
|
||||
|
||||
Attributes:
|
||||
query (str): The search query to be executed (e.g., "latest AI news").
|
||||
search_engine (Optional[str]): The search engine to use ("google", "bing", "yandex"). Default is "google".
|
||||
country (Optional[str]): Two-letter country code for geo-targeting (e.g., "us", "in"). Default is "us".
|
||||
language (Optional[str]): Language code for search results (e.g., "en", "es"). Default is "en".
|
||||
search_type (Optional[str]): Type of search, such as "isch" (images), "nws" (news), "jobs", etc.
|
||||
device_type (Optional[str]): Device type to simulate ("desktop", "mobile", "ios", "android"). Default is "desktop".
|
||||
parse_results (Optional[bool]): If True, results will be returned in structured JSON. If False, raw HTML. Default is True.
|
||||
"""
|
||||
|
||||
query: str = Field(..., description="Search query to perform")
|
||||
search_engine: Optional[str] = Field(
|
||||
default="google",
|
||||
description="Search engine domain (e.g., 'google', 'bing', 'yandex')",
|
||||
)
|
||||
country: Optional[str] = Field(
|
||||
default="us",
|
||||
description="Two-letter country code for geo-targeting (e.g., 'us', 'gb')",
|
||||
)
|
||||
language: Optional[str] = Field(
|
||||
default="en",
|
||||
description="Language code (e.g., 'en', 'es') used in the query URL",
|
||||
)
|
||||
search_type: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Type of search (e.g., 'isch' for images, 'nws' for news)",
|
||||
)
|
||||
device_type: Optional[str] = Field(
|
||||
default="desktop",
|
||||
description="Device type to simulate (e.g., 'mobile', 'desktop', 'ios')",
|
||||
)
|
||||
parse_results: Optional[bool] = Field(
|
||||
default=True,
|
||||
description="Whether to parse and return JSON (True) or raw HTML/text (False)",
|
||||
)
|
||||
|
||||
|
||||
class BrightDataSearchTool(BaseTool):
|
||||
"""
|
||||
A web search tool that utilizes Bright Data's SERP API to perform queries and return either structured results
|
||||
or raw page content from search engines like Google or Bing.
|
||||
|
||||
Attributes:
|
||||
name (str): Tool name used by the agent.
|
||||
description (str): A brief explanation of what the tool does.
|
||||
args_schema (Type[BaseModel]): Schema class for validating tool arguments.
|
||||
base_url (str): The Bright Data API endpoint used for making the POST request.
|
||||
api_key (str): Bright Data API key loaded from the environment variable 'BRIGHT_DATA_API_KEY'.
|
||||
zone (str): Zone identifier from Bright Data, loaded from the environment variable 'BRIGHT_DATA_ZONE'.
|
||||
|
||||
Raises:
|
||||
ValueError: If API key or zone environment variables are not set.
|
||||
"""
|
||||
|
||||
name: str = "Bright Data SERP Search"
|
||||
description: str = "Tool to perform web search using Bright Data SERP API."
|
||||
args_schema: Type[BaseModel] = BrightDataSearchToolSchema
|
||||
_config = BrightDataConfig.from_env()
|
||||
base_url: str = ""
|
||||
api_key: str = ""
|
||||
zone: str = ""
|
||||
query: Optional[str] = None
|
||||
search_engine: str = "google"
|
||||
country: str = "us"
|
||||
language: str = "en"
|
||||
search_type: Optional[str] = None
|
||||
device_type: str = "desktop"
|
||||
parse_results: bool = True
|
||||
|
||||
def __init__(self, query: str = None, search_engine: str = "google", country: str = "us", language: str = "en", search_type: str = None, device_type: str = "desktop", parse_results: bool = True):
|
||||
super().__init__()
|
||||
self.base_url = self._config.API_URL
|
||||
self.query = query
|
||||
self.search_engine = search_engine
|
||||
self.country = country
|
||||
self.language = language
|
||||
self.search_type = search_type
|
||||
self.device_type = device_type
|
||||
self.parse_results = parse_results
|
||||
|
||||
self.api_key = os.getenv("BRIGHT_DATA_API_KEY")
|
||||
self.zone = os.getenv("BRIGHT_DATA_ZONE")
|
||||
if not self.api_key:
|
||||
raise ValueError("BRIGHT_DATA_API_KEY environment variable is required.")
|
||||
if not self.zone:
|
||||
raise ValueError("BRIGHT_DATA_ZONE environment variable is required.")
|
||||
|
||||
def get_search_url(self, engine: str, query: str):
|
||||
if engine == "yandex":
|
||||
return f"https://yandex.com/search/?text=${query}"
|
||||
elif engine == "bing":
|
||||
return f"https://www.bing.com/search?q=${query}"
|
||||
return f"https://www.google.com/search?q=${query}"
|
||||
|
||||
def _run(self, query: str = None, search_engine: str = None, country: str = None, language: str = None, search_type: str = None, device_type: str = None, parse_results: bool = None, **kwargs) -> Any:
|
||||
"""
|
||||
Executes a search query using Bright Data SERP API and returns results.
|
||||
|
||||
Args:
|
||||
query (str): The search query string (URL encoded internally).
|
||||
search_engine (str): The search engine to use (default: "google").
|
||||
country (str): Country code for geotargeting (default: "us").
|
||||
language (str): Language code for the query (default: "en").
|
||||
search_type (str): Optional type of search such as "nws", "isch", "jobs".
|
||||
device_type (str): Optional device type to simulate (e.g., "mobile", "ios", "desktop").
|
||||
parse_results (bool): If True, returns structured data; else raw page (default: True).
|
||||
results_count (str or int): Number of search results to fetch (default: "10").
|
||||
|
||||
Returns:
|
||||
dict or str: Parsed JSON data from Bright Data if available, otherwise error message.
|
||||
"""
|
||||
|
||||
query = query or self.query
|
||||
search_engine = search_engine or self.search_engine
|
||||
country = country or self.country
|
||||
language = language or self.language
|
||||
search_type = search_type or self.search_type
|
||||
device_type = device_type or self.device_type
|
||||
parse_results = parse_results if parse_results is not None else self.parse_results
|
||||
results_count = kwargs.get("results_count", "10")
|
||||
|
||||
# Validate required parameters
|
||||
if not query:
|
||||
raise ValueError("query is required either in constructor or method call")
|
||||
|
||||
# Build the search URL
|
||||
query = urllib.parse.quote(query)
|
||||
url = self.get_search_url(search_engine, query)
|
||||
|
||||
# Add parameters to the URL
|
||||
params = []
|
||||
|
||||
if country:
|
||||
params.append(f"gl={country}")
|
||||
|
||||
if language:
|
||||
params.append(f"hl={language}")
|
||||
|
||||
if results_count:
|
||||
params.append(f"num={results_count}")
|
||||
|
||||
if parse_results:
|
||||
params.append(f"brd_json=1")
|
||||
|
||||
if search_type:
|
||||
if search_type == "jobs":
|
||||
params.append("ibp=htl;jobs")
|
||||
else:
|
||||
params.append(f"tbm={search_type}")
|
||||
|
||||
if device_type:
|
||||
if device_type == "mobile":
|
||||
params.append("brd_mobile=1")
|
||||
elif device_type == "ios":
|
||||
params.append("brd_mobile=ios")
|
||||
elif device_type == "android":
|
||||
params.append("brd_mobile=android")
|
||||
|
||||
# Combine parameters with the URL
|
||||
if params:
|
||||
url += "&" + "&".join(params)
|
||||
|
||||
# Set up the API request parameters
|
||||
request_params = {"zone": self.zone, "url": url, "format": "raw"}
|
||||
|
||||
request_params = {k: v for k, v in request_params.items() if v is not None}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
self.base_url, json=request_params, headers=headers
|
||||
)
|
||||
|
||||
print(f"Status code: {response.status_code}")
|
||||
response.raise_for_status()
|
||||
|
||||
return response.text
|
||||
|
||||
except requests.RequestException as e:
|
||||
return f"Error performing BrightData search: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"Error fetching results: {str(e)}"
|
||||
122
crewai_tools/tools/brightdata_tool/brightdata_unlocker.py
Normal file
122
crewai_tools/tools/brightdata_tool/brightdata_unlocker.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class BrightDataConfig(BaseModel):
|
||||
API_URL: str = "https://api.brightdata.com/request"
|
||||
|
||||
@classmethod
|
||||
def from_env(cls):
|
||||
return cls(
|
||||
API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com/request")
|
||||
)
|
||||
|
||||
class BrightDataUnlockerToolSchema(BaseModel):
|
||||
"""
|
||||
Pydantic schema for input parameters used by the BrightDataWebUnlockerTool.
|
||||
|
||||
This schema defines the structure and validation for parameters passed when performing
|
||||
a web scraping request using Bright Data's Web Unlocker.
|
||||
|
||||
Attributes:
|
||||
url (str): The target URL to scrape.
|
||||
format (Optional[str]): Format of the response returned by Bright Data. Default 'raw' format.
|
||||
data_format (Optional[str]): Response data format (html by default). markdown is one more option.
|
||||
"""
|
||||
|
||||
url: str = Field(..., description="URL to perform the web scraping")
|
||||
format: Optional[str] = Field(
|
||||
default="raw", description="Response format (raw is standard)"
|
||||
)
|
||||
data_format: Optional[str] = Field(
|
||||
default="markdown", description="Response data format (html by default)"
|
||||
)
|
||||
|
||||
|
||||
class BrightDataWebUnlockerTool(BaseTool):
|
||||
"""
|
||||
A tool for performing web scraping using the Bright Data Web Unlocker API.
|
||||
|
||||
This tool allows automated and programmatic access to web pages by routing requests
|
||||
through Bright Data's unlocking and proxy infrastructure, which can bypass bot
|
||||
protection mechanisms like CAPTCHA, geo-restrictions, and anti-bot detection.
|
||||
|
||||
Attributes:
|
||||
name (str): Name of the tool.
|
||||
description (str): Description of what the tool does.
|
||||
args_schema (Type[BaseModel]): Pydantic model schema for expected input arguments.
|
||||
base_url (str): Base URL of the Bright Data Web Unlocker API.
|
||||
api_key (str): Bright Data API key (must be set in the BRIGHT_DATA_API_KEY environment variable).
|
||||
zone (str): Bright Data zone identifier (must be set in the BRIGHT_DATA_ZONE environment variable).
|
||||
|
||||
Methods:
|
||||
_run(**kwargs: Any) -> Any:
|
||||
Sends a scraping request to Bright Data's Web Unlocker API and returns the result.
|
||||
"""
|
||||
|
||||
name: str = "Bright Data Web Unlocker Scraping"
|
||||
description: str = "Tool to perform web scraping using Bright Data Web Unlocker"
|
||||
args_schema: Type[BaseModel] = BrightDataUnlockerToolSchema
|
||||
_config = BrightDataConfig.from_env()
|
||||
base_url: str = ""
|
||||
api_key: str = ""
|
||||
zone: str = ""
|
||||
url: Optional[str] = None
|
||||
format: str = "raw"
|
||||
data_format: str = "markdown"
|
||||
|
||||
def __init__(self, url: str = None, format: str = "raw", data_format: str = "markdown"):
|
||||
super().__init__()
|
||||
self.base_url = self._config.API_URL
|
||||
self.url = url
|
||||
self.format = format
|
||||
self.data_format = data_format
|
||||
|
||||
self.api_key = os.getenv("BRIGHT_DATA_API_KEY")
|
||||
self.zone = os.getenv("BRIGHT_DATA_ZONE")
|
||||
if not self.api_key:
|
||||
raise ValueError("BRIGHT_DATA_API_KEY environment variable is required.")
|
||||
if not self.zone:
|
||||
raise ValueError("BRIGHT_DATA_ZONE environment variable is required.")
|
||||
|
||||
def _run(self, url: str = None, format: str = None, data_format: str = None, **kwargs: Any) -> Any:
|
||||
url = url or self.url
|
||||
format = format or self.format
|
||||
data_format = data_format or self.data_format
|
||||
|
||||
if not url:
|
||||
raise ValueError("url is required either in constructor or method call")
|
||||
|
||||
payload = {
|
||||
"url": url,
|
||||
"zone": self.zone,
|
||||
"format": format,
|
||||
}
|
||||
valid_data_formats = {"html", "markdown"}
|
||||
if data_format not in valid_data_formats:
|
||||
raise ValueError(
|
||||
f"Unsupported data format: {data_format}. Must be one of {', '.join(valid_data_formats)}."
|
||||
)
|
||||
|
||||
if data_format == "markdown":
|
||||
payload["data_format"] = "markdown"
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(self.base_url, json=payload, headers=headers)
|
||||
print(f"Status Code: {response.status_code}")
|
||||
response.raise_for_status()
|
||||
|
||||
return response.text
|
||||
|
||||
except requests.RequestException as e:
|
||||
return f"HTTP Error performing BrightData Web Unlocker Scrape: {e}\nResponse: {getattr(e.response, 'text', '')}"
|
||||
except Exception as e:
|
||||
return f"Error fetching results: {str(e)}"
|
||||
38
crewai_tools/tools/browserbase_load_tool/README.md
Normal file
38
crewai_tools/tools/browserbase_load_tool/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# BrowserbaseLoadTool
|
||||
|
||||
## Description
|
||||
|
||||
[Browserbase](https://browserbase.com) is a developer platform to reliably run, manage, and monitor headless browsers.
|
||||
|
||||
Power your AI data retrievals with:
|
||||
- [Serverless Infrastructure](https://docs.browserbase.com/under-the-hood) providing reliable browsers to extract data from complex UIs
|
||||
- [Stealth Mode](https://docs.browserbase.com/features/stealth-mode) with included fingerprinting tactics and automatic captcha solving
|
||||
- [Session Debugger](https://docs.browserbase.com/features/sessions) to inspect your Browser Session with networks timeline and logs
|
||||
- [Live Debug](https://docs.browserbase.com/guides/session-debug-connection/browser-remote-control) to quickly debug your automation
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key and Project ID from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID`).
|
||||
- Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install browserbase 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the BrowserbaseLoadTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import BrowserbaseLoadTool
|
||||
|
||||
tool = BrowserbaseLoadTool()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key` Optional. Browserbase API key. Default is `BROWSERBASE_API_KEY` env variable.
|
||||
- `project_id` Optional. Browserbase Project ID. Default is `BROWSERBASE_PROJECT_ID` env variable.
|
||||
- `text_content` Retrieve only text content. Default is `False`.
|
||||
- `session_id` Optional. Provide an existing Session ID.
|
||||
- `proxy` Optional. Enable/Disable Proxies."
|
||||
@@ -0,0 +1,67 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type, List
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class BrowserbaseLoadToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
|
||||
|
||||
class BrowserbaseLoadTool(BaseTool):
|
||||
name: str = "Browserbase web load tool"
|
||||
description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
|
||||
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
|
||||
api_key: Optional[str] = os.getenv("BROWSERBASE_API_KEY")
|
||||
project_id: Optional[str] = os.getenv("BROWSERBASE_PROJECT_ID")
|
||||
text_content: Optional[bool] = False
|
||||
session_id: Optional[str] = None
|
||||
proxy: Optional[bool] = None
|
||||
browserbase: Optional[Any] = None
|
||||
package_dependencies: List[str] = ["browserbase"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="BROWSERBASE_API_KEY", description="API key for Browserbase services", required=False),
|
||||
EnvVar(name="BROWSERBASE_PROJECT_ID", description="Project ID for Browserbase services", required=False),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
project_id: Optional[str] = None,
|
||||
text_content: Optional[bool] = False,
|
||||
session_id: Optional[str] = None,
|
||||
proxy: Optional[bool] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if not self.api_key:
|
||||
raise EnvironmentError(
|
||||
"BROWSERBASE_API_KEY environment variable is required for initialization"
|
||||
)
|
||||
try:
|
||||
from browserbase import Browserbase # type: ignore
|
||||
except ImportError:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"`browserbase` package not found, would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "browserbase"], check=True)
|
||||
from browserbase import Browserbase # type: ignore
|
||||
else:
|
||||
raise ImportError(
|
||||
"`browserbase` package not found, please run `uv add browserbase`"
|
||||
)
|
||||
|
||||
self.browserbase = Browserbase(api_key=self.api_key)
|
||||
self.text_content = text_content
|
||||
self.session_id = session_id
|
||||
self.proxy = proxy
|
||||
|
||||
def _run(self, url: str):
|
||||
return self.browserbase.load_url(
|
||||
url, self.text_content, self.session_id, self.proxy
|
||||
)
|
||||
56
crewai_tools/tools/code_docs_search_tool/README.md
Normal file
56
crewai_tools/tools/code_docs_search_tool/README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# CodeDocsSearchTool
|
||||
|
||||
## Description
|
||||
The CodeDocsSearchTool is a powerful RAG (Retrieval-Augmented Generation) tool designed for semantic searches within code documentation. It enables users to efficiently find specific information or topics within code documentation. By providing a `docs_url` during initialization, the tool narrows down the search to that particular documentation site. Alternatively, without a specific `docs_url`, it searches across a wide array of code documentation known or discovered throughout its execution, making it versatile for various documentation search needs.
|
||||
|
||||
## Installation
|
||||
To start using the CodeDocsSearchTool, first, install the crewai_tools package via pip:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
Utilize the CodeDocsSearchTool as follows to conduct searches within code documentation:
|
||||
```python
|
||||
from crewai_tools import CodeDocsSearchTool
|
||||
|
||||
# To search any code documentation content if the URL is known or discovered during its execution:
|
||||
tool = CodeDocsSearchTool()
|
||||
|
||||
# OR
|
||||
|
||||
# To specifically focus your search on a given documentation site by providing its URL:
|
||||
tool = CodeDocsSearchTool(docs_url='https://docs.example.com/reference')
|
||||
```
|
||||
Note: Substitute 'https://docs.example.com/reference' with your target documentation URL and 'How to use search tool' with the search query relevant to your needs.
|
||||
|
||||
## Arguments
|
||||
- `docs_url`: Optional. Specifies the URL of the code documentation to be searched. Providing this during the tool's initialization focuses the search on the specified documentation content.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = CodeDocsSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,56 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
try:
|
||||
from embedchain.models.data_type import DataType
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedCodeDocsSearchToolSchema(BaseModel):
|
||||
"""Input for CodeDocsSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the Code Docs content",
|
||||
)
|
||||
|
||||
|
||||
class CodeDocsSearchToolSchema(FixedCodeDocsSearchToolSchema):
|
||||
"""Input for CodeDocsSearchTool."""
|
||||
|
||||
docs_url: str = Field(..., description="Mandatory docs_url path you want to search")
|
||||
|
||||
|
||||
class CodeDocsSearchTool(RagTool):
|
||||
name: str = "Search a Code Docs content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a Code Docs content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = CodeDocsSearchToolSchema
|
||||
|
||||
def __init__(self, docs_url: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if docs_url is not None:
|
||||
self.add(docs_url)
|
||||
self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content."
|
||||
self.args_schema = FixedCodeDocsSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def add(self, docs_url: str) -> None:
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().add(docs_url, data_type=DataType.DOCS_SITE)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
docs_url: Optional[str] = None,
|
||||
) -> str:
|
||||
if docs_url is not None:
|
||||
self.add(docs_url)
|
||||
return super()._run(query=search_query)
|
||||
6
crewai_tools/tools/code_interpreter_tool/Dockerfile
Normal file
6
crewai_tools/tools/code_interpreter_tool/Dockerfile
Normal file
@@ -0,0 +1,6 @@
|
||||
FROM python:3.12-alpine
|
||||
|
||||
RUN pip install requests beautifulsoup4
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /workspace
|
||||
53
crewai_tools/tools/code_interpreter_tool/README.md
Normal file
53
crewai_tools/tools/code_interpreter_tool/README.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# CodeInterpreterTool
|
||||
|
||||
## Description
|
||||
This tool is used to give the Agent the ability to run code (Python3) from the code generated by the Agent itself. The code is executed in a sandboxed environment, so it is safe to run any code.
|
||||
|
||||
It is incredible useful since it allows the Agent to generate code, run it in the same environment, get the result and use it to make decisions.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Docker
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Remember that when using this tool, the code must be generated by the Agent itself. The code must be a Python3 code. And it will take some time for the first time to run because it needs to build the Docker image.
|
||||
|
||||
```python
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[CodeInterpreterTool()],
|
||||
)
|
||||
```
|
||||
|
||||
Or if you need to pass your own Dockerfile just do this
|
||||
|
||||
```python
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[CodeInterpreterTool(user_dockerfile_path="<Dockerfile_path>")],
|
||||
)
|
||||
```
|
||||
|
||||
If it is difficult to connect to docker daemon automatically (especially for macOS users), you can do this to setup docker host manually
|
||||
|
||||
```python
|
||||
from crewai_tools import CodeInterpreterTool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[CodeInterpreterTool(user_docker_base_url="<Docker Host Base Url>",
|
||||
user_dockerfile_path="<Dockerfile_path>")],
|
||||
)
|
||||
|
||||
```
|
||||
@@ -0,0 +1,373 @@
|
||||
"""Code Interpreter Tool for executing Python code in isolated environments.
|
||||
|
||||
This module provides a tool for executing Python code either in a Docker container for
|
||||
safe isolation or directly in a restricted sandbox. It includes mechanisms for blocking
|
||||
potentially unsafe operations and importing restricted modules.
|
||||
"""
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from docker import DockerClient
|
||||
from docker import from_env as docker_from_env
|
||||
from docker.errors import ImageNotFound, NotFound
|
||||
from docker.models.containers import Container
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.printer import Printer
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
"""Schema for defining inputs to the CodeInterpreterTool.
|
||||
|
||||
This schema defines the required parameters for code execution,
|
||||
including the code to run and any libraries that need to be installed.
|
||||
"""
|
||||
|
||||
code: str = Field(
|
||||
...,
|
||||
description="Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code",
|
||||
)
|
||||
|
||||
libraries_used: List[str] = Field(
|
||||
...,
|
||||
description="List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4",
|
||||
)
|
||||
|
||||
|
||||
class SandboxPython:
|
||||
"""A restricted Python execution environment for running code safely.
|
||||
|
||||
This class provides methods to safely execute Python code by restricting access to
|
||||
potentially dangerous modules and built-in functions. It creates a sandboxed
|
||||
environment where harmful operations are blocked.
|
||||
"""
|
||||
|
||||
BLOCKED_MODULES = {
|
||||
"os",
|
||||
"sys",
|
||||
"subprocess",
|
||||
"shutil",
|
||||
"importlib",
|
||||
"inspect",
|
||||
"tempfile",
|
||||
"sysconfig",
|
||||
"builtins",
|
||||
}
|
||||
|
||||
UNSAFE_BUILTINS = {
|
||||
"exec",
|
||||
"eval",
|
||||
"open",
|
||||
"compile",
|
||||
"input",
|
||||
"globals",
|
||||
"locals",
|
||||
"vars",
|
||||
"help",
|
||||
"dir",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def restricted_import(
|
||||
name: str,
|
||||
custom_globals: Optional[Dict[str, Any]] = None,
|
||||
custom_locals: Optional[Dict[str, Any]] = None,
|
||||
fromlist: Optional[List[str]] = None,
|
||||
level: int = 0,
|
||||
) -> ModuleType:
|
||||
"""A restricted import function that blocks importing of unsafe modules.
|
||||
|
||||
Args:
|
||||
name: The name of the module to import.
|
||||
custom_globals: Global namespace to use.
|
||||
custom_locals: Local namespace to use.
|
||||
fromlist: List of items to import from the module.
|
||||
level: The level value passed to __import__.
|
||||
|
||||
Returns:
|
||||
The imported module if allowed.
|
||||
|
||||
Raises:
|
||||
ImportError: If the module is in the blocked modules list.
|
||||
"""
|
||||
if name in SandboxPython.BLOCKED_MODULES:
|
||||
raise ImportError(f"Importing '{name}' is not allowed.")
|
||||
return __import__(name, custom_globals, custom_locals, fromlist or (), level)
|
||||
|
||||
@staticmethod
|
||||
def safe_builtins() -> Dict[str, Any]:
|
||||
"""Creates a dictionary of built-in functions with unsafe ones removed.
|
||||
|
||||
Returns:
|
||||
A dictionary of safe built-in functions and objects.
|
||||
"""
|
||||
import builtins
|
||||
|
||||
safe_builtins = {
|
||||
k: v
|
||||
for k, v in builtins.__dict__.items()
|
||||
if k not in SandboxPython.UNSAFE_BUILTINS
|
||||
}
|
||||
safe_builtins["__import__"] = SandboxPython.restricted_import
|
||||
return safe_builtins
|
||||
|
||||
@staticmethod
|
||||
def exec(code: str, locals: Dict[str, Any]) -> None:
|
||||
"""Executes Python code in a restricted environment.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
locals: A dictionary that will be used for local variable storage.
|
||||
"""
|
||||
exec(code, {"__builtins__": SandboxPython.safe_builtins()}, locals)
|
||||
|
||||
|
||||
class CodeInterpreterTool(BaseTool):
|
||||
"""A tool for executing Python code in isolated environments.
|
||||
|
||||
This tool provides functionality to run Python code either in a Docker container
|
||||
for safe isolation or directly in a restricted sandbox. It can handle installing
|
||||
Python packages and executing arbitrary Python code.
|
||||
"""
|
||||
|
||||
name: str = "Code Interpreter"
|
||||
description: str = "Interprets Python3 code strings with a final print statement."
|
||||
args_schema: Type[BaseModel] = CodeInterpreterSchema
|
||||
default_image_tag: str = "code-interpreter:latest"
|
||||
code: Optional[str] = None
|
||||
user_dockerfile_path: Optional[str] = None
|
||||
user_docker_base_url: Optional[str] = None
|
||||
unsafe_mode: bool = False
|
||||
|
||||
@staticmethod
|
||||
def _get_installed_package_path() -> str:
|
||||
"""Gets the installation path of the crewai_tools package.
|
||||
|
||||
Returns:
|
||||
The directory path where the package is installed.
|
||||
"""
|
||||
spec = importlib.util.find_spec("crewai_tools")
|
||||
return os.path.dirname(spec.origin)
|
||||
|
||||
def _verify_docker_image(self) -> None:
|
||||
"""Verifies if the Docker image is available or builds it if necessary.
|
||||
|
||||
Checks if the required Docker image exists. If not, builds it using either a
|
||||
user-provided Dockerfile or the default one included with the package.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the Dockerfile cannot be found.
|
||||
"""
|
||||
|
||||
client = (
|
||||
docker_from_env()
|
||||
if self.user_docker_base_url is None
|
||||
else DockerClient(base_url=self.user_docker_base_url)
|
||||
)
|
||||
|
||||
try:
|
||||
client.images.get(self.default_image_tag)
|
||||
|
||||
except ImageNotFound:
|
||||
if self.user_dockerfile_path and os.path.exists(self.user_dockerfile_path):
|
||||
dockerfile_path = self.user_dockerfile_path
|
||||
else:
|
||||
package_path = self._get_installed_package_path()
|
||||
dockerfile_path = os.path.join(
|
||||
package_path, "tools/code_interpreter_tool"
|
||||
)
|
||||
if not os.path.exists(dockerfile_path):
|
||||
raise FileNotFoundError(
|
||||
f"Dockerfile not found in {dockerfile_path}"
|
||||
)
|
||||
|
||||
client.images.build(
|
||||
path=dockerfile_path,
|
||||
tag=self.default_image_tag,
|
||||
rm=True,
|
||||
)
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
"""Runs the code interpreter tool with the provided arguments.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments that should include 'code' and 'libraries_used'.
|
||||
|
||||
Returns:
|
||||
The output of the executed code as a string.
|
||||
"""
|
||||
code = kwargs.get("code", self.code)
|
||||
libraries_used = kwargs.get("libraries_used", [])
|
||||
|
||||
if self.unsafe_mode:
|
||||
return self.run_code_unsafe(code, libraries_used)
|
||||
else:
|
||||
return self.run_code_safety(code, libraries_used)
|
||||
|
||||
def _install_libraries(self, container: Container, libraries: List[str]) -> None:
|
||||
"""Installs required Python libraries in the Docker container.
|
||||
|
||||
Args:
|
||||
container: The Docker container where libraries will be installed.
|
||||
libraries: A list of library names to install using pip.
|
||||
"""
|
||||
for library in libraries:
|
||||
container.exec_run(["pip", "install", library])
|
||||
|
||||
def _init_docker_container(self) -> Container:
|
||||
"""Initializes and returns a Docker container for code execution.
|
||||
|
||||
Stops and removes any existing container with the same name before creating
|
||||
a new one. Maps the current working directory to /workspace in the container.
|
||||
|
||||
Returns:
|
||||
A Docker container object ready for code execution.
|
||||
"""
|
||||
container_name = "code-interpreter"
|
||||
client = docker_from_env()
|
||||
current_path = os.getcwd()
|
||||
|
||||
# Check if the container is already running
|
||||
try:
|
||||
existing_container = client.containers.get(container_name)
|
||||
existing_container.stop()
|
||||
existing_container.remove()
|
||||
except NotFound:
|
||||
pass # Container does not exist, no need to remove
|
||||
|
||||
return client.containers.run(
|
||||
self.default_image_tag,
|
||||
detach=True,
|
||||
tty=True,
|
||||
working_dir="/workspace",
|
||||
name=container_name,
|
||||
volumes={current_path: {"bind": "/workspace", "mode": "rw"}}, # type: ignore
|
||||
)
|
||||
|
||||
def _check_docker_available(self) -> bool:
|
||||
"""Checks if Docker is available and running on the system.
|
||||
|
||||
Attempts to run the 'docker info' command to verify Docker availability.
|
||||
Prints appropriate messages if Docker is not installed or not running.
|
||||
|
||||
Returns:
|
||||
True if Docker is available and running, False otherwise.
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
["docker", "info"],
|
||||
check=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=1,
|
||||
)
|
||||
return True
|
||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
|
||||
Printer.print(
|
||||
"Docker is installed but not running or inaccessible.",
|
||||
color="bold_purple",
|
||||
)
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
Printer.print("Docker is not installed", color="bold_purple")
|
||||
return False
|
||||
|
||||
def run_code_safety(self, code: str, libraries_used: List[str]) -> str:
|
||||
"""Runs code in the safest available environment.
|
||||
|
||||
Attempts to run code in Docker if available, falls back to a restricted
|
||||
sandbox if Docker is not available.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
libraries_used: A list of Python library names to install before execution.
|
||||
|
||||
Returns:
|
||||
The output of the executed code as a string.
|
||||
"""
|
||||
if self._check_docker_available():
|
||||
return self.run_code_in_docker(code, libraries_used)
|
||||
else:
|
||||
return self.run_code_in_restricted_sandbox(code)
|
||||
|
||||
def run_code_in_docker(self, code: str, libraries_used: List[str]) -> str:
|
||||
"""Runs Python code in a Docker container for safe isolation.
|
||||
|
||||
Creates a Docker container, installs the required libraries, executes the code,
|
||||
and then cleans up by stopping and removing the container.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
libraries_used: A list of Python library names to install before execution.
|
||||
|
||||
Returns:
|
||||
The output of the executed code as a string, or an error message if execution failed.
|
||||
"""
|
||||
Printer.print("Running code in Docker environment", color="bold_blue")
|
||||
self._verify_docker_image()
|
||||
container = self._init_docker_container()
|
||||
self._install_libraries(container, libraries_used)
|
||||
|
||||
exec_result = container.exec_run(["python3", "-c", code])
|
||||
|
||||
container.stop()
|
||||
container.remove()
|
||||
|
||||
if exec_result.exit_code != 0:
|
||||
return f"Something went wrong while running the code: \n{exec_result.output.decode('utf-8')}"
|
||||
return exec_result.output.decode("utf-8")
|
||||
|
||||
def run_code_in_restricted_sandbox(self, code: str) -> str:
|
||||
"""Runs Python code in a restricted sandbox environment.
|
||||
|
||||
Executes the code with restricted access to potentially dangerous modules and
|
||||
built-in functions for basic safety when Docker is not available.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
|
||||
Returns:
|
||||
The value of the 'result' variable from the executed code,
|
||||
or an error message if execution failed.
|
||||
"""
|
||||
Printer.print("Running code in restricted sandbox", color="yellow")
|
||||
exec_locals = {}
|
||||
try:
|
||||
SandboxPython.exec(code=code, locals=exec_locals)
|
||||
return exec_locals.get("result", "No result variable found.")
|
||||
except Exception as e:
|
||||
return f"An error occurred: {str(e)}"
|
||||
|
||||
def run_code_unsafe(self, code: str, libraries_used: List[str]) -> str:
|
||||
"""Runs code directly on the host machine without any safety restrictions.
|
||||
|
||||
WARNING: This mode is unsafe and should only be used in trusted environments
|
||||
with code from trusted sources.
|
||||
|
||||
Args:
|
||||
code: The Python code to execute as a string.
|
||||
libraries_used: A list of Python library names to install before execution.
|
||||
|
||||
Returns:
|
||||
The value of the 'result' variable from the executed code,
|
||||
or an error message if execution failed.
|
||||
"""
|
||||
|
||||
Printer.print("WARNING: Running code in unsafe mode", color="bold_magenta")
|
||||
# Install libraries on the host machine
|
||||
for library in libraries_used:
|
||||
os.system(f"pip install {library}")
|
||||
|
||||
# Execute the code
|
||||
try:
|
||||
exec_locals = {}
|
||||
exec(code, {}, exec_locals)
|
||||
return exec_locals.get("result", "No result variable found.")
|
||||
except Exception as e:
|
||||
return f"An error occurred: {str(e)}"
|
||||
72
crewai_tools/tools/composio_tool/README.md
Normal file
72
crewai_tools/tools/composio_tool/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# ComposioTool Documentation
|
||||
|
||||
## Description
|
||||
|
||||
This tools is a wrapper around the composio toolset and gives your agent access to a wide variety of tools from the composio SDK.
|
||||
|
||||
## Installation
|
||||
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```shell
|
||||
pip install composio-core
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
after the installation is complete, either run `composio login` or export your composio API key as `COMPOSIO_API_KEY`.
|
||||
|
||||
## Example
|
||||
|
||||
The following example demonstrates how to initialize the tool and execute a github action:
|
||||
|
||||
1. Initialize toolset
|
||||
|
||||
```python
|
||||
from composio import App
|
||||
from crewai_tools import ComposioTool
|
||||
from crewai import Agent, Task
|
||||
|
||||
|
||||
tools = [ComposioTool.from_action(action=Action.GITHUB_ACTIVITY_STAR_REPO_FOR_AUTHENTICATED_USER)]
|
||||
```
|
||||
|
||||
If you don't know what action you want to use, use `from_app` and `tags` filter to get relevant actions
|
||||
|
||||
```python
|
||||
tools = ComposioTool.from_app(App.GITHUB, tags=["important"])
|
||||
```
|
||||
|
||||
or use `use_case` to search relevant actions
|
||||
|
||||
```python
|
||||
tools = ComposioTool.from_app(App.GITHUB, use_case="Star a github repository")
|
||||
```
|
||||
|
||||
2. Define agent
|
||||
|
||||
```python
|
||||
crewai_agent = Agent(
|
||||
role="Github Agent",
|
||||
goal="You take action on Github using Github APIs",
|
||||
backstory=(
|
||||
"You are AI agent that is responsible for taking actions on Github "
|
||||
"on users behalf. You need to take action on Github using Github APIs"
|
||||
),
|
||||
verbose=True,
|
||||
tools=tools,
|
||||
)
|
||||
```
|
||||
|
||||
3. Execute task
|
||||
|
||||
```python
|
||||
task = Task(
|
||||
description="Star a repo ComposioHQ/composio on GitHub",
|
||||
agent=crewai_agent,
|
||||
expected_output="if the star happened",
|
||||
)
|
||||
|
||||
task.execute()
|
||||
```
|
||||
|
||||
* More detailed list of tools can be found [here](https://app.composio.dev)
|
||||
124
crewai_tools/tools/composio_tool/composio_tool.py
Normal file
124
crewai_tools/tools/composio_tool/composio_tool.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Composio tools wrapper.
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
|
||||
import typing_extensions as te
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
|
||||
|
||||
class ComposioTool(BaseTool):
|
||||
"""Wrapper for composio tools."""
|
||||
|
||||
composio_action: t.Callable
|
||||
env_vars: t.List[EnvVar] = [
|
||||
EnvVar(name="COMPOSIO_API_KEY", description="API key for Composio services", required=True),
|
||||
]
|
||||
|
||||
def _run(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
|
||||
"""Run the composio action with given arguments."""
|
||||
return self.composio_action(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _check_connected_account(tool: t.Any, toolset: t.Any) -> None:
|
||||
"""Check if connected account is required and if required it exists or not."""
|
||||
from composio import Action
|
||||
from composio.client.collections import ConnectedAccountModel
|
||||
|
||||
tool = t.cast(Action, tool)
|
||||
if tool.no_auth:
|
||||
return
|
||||
|
||||
connections = t.cast(
|
||||
t.List[ConnectedAccountModel],
|
||||
toolset.client.connected_accounts.get(),
|
||||
)
|
||||
if tool.app not in [connection.appUniqueId for connection in connections]:
|
||||
raise RuntimeError(
|
||||
f"No connected account found for app `{tool.app}`; "
|
||||
f"Run `composio add {tool.app}` to fix this"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_action(
|
||||
cls,
|
||||
action: t.Any,
|
||||
**kwargs: t.Any,
|
||||
) -> te.Self:
|
||||
"""Wrap a composio tool as crewAI tool."""
|
||||
|
||||
from composio import Action, ComposioToolSet
|
||||
from composio.constants import DEFAULT_ENTITY_ID
|
||||
from composio.utils.shared import json_schema_to_model
|
||||
|
||||
toolset = ComposioToolSet()
|
||||
if not isinstance(action, Action):
|
||||
action = Action(action)
|
||||
|
||||
action = t.cast(Action, action)
|
||||
cls._check_connected_account(
|
||||
tool=action,
|
||||
toolset=toolset,
|
||||
)
|
||||
|
||||
(action_schema,) = toolset.get_action_schemas(actions=[action])
|
||||
schema = action_schema.model_dump(exclude_none=True)
|
||||
entity_id = kwargs.pop("entity_id", DEFAULT_ENTITY_ID)
|
||||
|
||||
def function(**kwargs: t.Any) -> t.Dict:
|
||||
"""Wrapper function for composio action."""
|
||||
return toolset.execute_action(
|
||||
action=Action(schema["name"]),
|
||||
params=kwargs,
|
||||
entity_id=entity_id,
|
||||
)
|
||||
|
||||
function.__name__ = schema["name"]
|
||||
function.__doc__ = schema["description"]
|
||||
|
||||
return cls(
|
||||
name=schema["name"],
|
||||
description=schema["description"],
|
||||
args_schema=json_schema_to_model(
|
||||
action_schema.parameters.model_dump(
|
||||
exclude_none=True,
|
||||
)
|
||||
),
|
||||
composio_action=function,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_app(
|
||||
cls,
|
||||
*apps: t.Any,
|
||||
tags: t.Optional[t.List[str]] = None,
|
||||
use_case: t.Optional[str] = None,
|
||||
**kwargs: t.Any,
|
||||
) -> t.List[te.Self]:
|
||||
"""Create toolset from an app."""
|
||||
if len(apps) == 0:
|
||||
raise ValueError("You need to provide at least one app name")
|
||||
|
||||
if use_case is None and tags is None:
|
||||
raise ValueError("Both `use_case` and `tags` cannot be `None`")
|
||||
|
||||
if use_case is not None and tags is not None:
|
||||
raise ValueError(
|
||||
"Cannot use both `use_case` and `tags` to filter the actions"
|
||||
)
|
||||
|
||||
from composio import ComposioToolSet
|
||||
|
||||
toolset = ComposioToolSet()
|
||||
if use_case is not None:
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_use_case(*apps, use_case=use_case)
|
||||
]
|
||||
|
||||
return [
|
||||
cls.from_action(action=action, **kwargs)
|
||||
for action in toolset.find_actions_by_tags(*apps, tags=tags)
|
||||
]
|
||||
58
crewai_tools/tools/contextualai_create_agent_tool/README.md
Normal file
58
crewai_tools/tools/contextualai_create_agent_tool/README.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# ContextualAICreateAgentTool
|
||||
|
||||
## Description
|
||||
This tool is designed to integrate Contextual AI's enterprise-grade RAG agents with CrewAI. This tool enables you to create a new Contextual RAG agent. It uploads your documents to create a datastore and returns the Contextual agent ID and datastore ID.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```
|
||||
pip install 'crewai[tools]' contextual-client
|
||||
```
|
||||
|
||||
**Note**: You'll need a Contextual AI API key. Sign up at [app.contextual.ai](https://app.contextual.ai) to get your free API key.
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from crewai_tools import ContextualAICreateAgentTool
|
||||
|
||||
# Initialize the tool
|
||||
tool = ContextualAICreateAgentTool(api_key="your_api_key_here")
|
||||
|
||||
# Create agent with documents
|
||||
result = tool._run(
|
||||
agent_name="Financial Analysis Agent",
|
||||
agent_description="Agent for analyzing financial documents",
|
||||
datastore_name="Financial Reports",
|
||||
document_paths=["/path/to/report1.pdf", "/path/to/report2.pdf"],
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Parameters
|
||||
- `api_key`: Your Contextual AI API key
|
||||
- `agent_name`: Name for the new agent
|
||||
- `agent_description`: Description of the agent's purpose
|
||||
- `datastore_name`: Name for the document datastore
|
||||
- `document_paths`: List of file paths to upload
|
||||
|
||||
Example result:
|
||||
|
||||
```
|
||||
Successfully created agent 'Research Analyst' with ID: {created_agent_ID} and datastore ID: {created_datastore_ID}. Uploaded 5 documents.
|
||||
```
|
||||
|
||||
You can use `ContextualAIQueryTool` with the returned IDs to query the knowledge base and retrieve relevant information from your documents.
|
||||
|
||||
## Key Features
|
||||
- **Complete Pipeline Setup**: Creates datastore, uploads documents, and configures agent in one operation
|
||||
- **Document Processing**: Leverages Contextual AI's powerful parser to ingest complex PDFs and documents
|
||||
- **Vector Storage**: Use Contextual AI's datastore for large document collections
|
||||
|
||||
## Use Cases
|
||||
- Set up new RAG agents from scratch with complete automation
|
||||
- Upload and organize document collections into structured datastores
|
||||
- Create specialized domain agents for legal, financial, technical, or research workflows
|
||||
|
||||
For more detailed information about Contextual AI's capabilities, visit the [official documentation](https://docs.contextual.ai).
|
||||
@@ -0,0 +1,71 @@
|
||||
from typing import Any, Optional, Type, List
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
import os
|
||||
|
||||
|
||||
class ContextualAICreateAgentSchema(BaseModel):
|
||||
"""Schema for contextual create agent tool."""
|
||||
agent_name: str = Field(..., description="Name for the new agent")
|
||||
agent_description: str = Field(..., description="Description for the new agent")
|
||||
datastore_name: str = Field(..., description="Name for the new datastore")
|
||||
document_paths: List[str] = Field(..., description="List of file paths to upload")
|
||||
|
||||
|
||||
class ContextualAICreateAgentTool(BaseTool):
|
||||
"""Tool to create Contextual AI RAG agents with documents."""
|
||||
|
||||
name: str = "Contextual AI Create Agent Tool"
|
||||
description: str = "Create a new Contextual AI RAG agent with documents and datastore"
|
||||
args_schema: Type[BaseModel] = ContextualAICreateAgentSchema
|
||||
|
||||
api_key: str
|
||||
contextual_client: Any = None
|
||||
package_dependencies: List[str] = ["contextual-client"]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from contextual import ContextualAI
|
||||
self.contextual_client = ContextualAI(api_key=self.api_key)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"contextual-client package is required. Install it with: pip install contextual-client"
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
agent_name: str,
|
||||
agent_description: str,
|
||||
datastore_name: str,
|
||||
document_paths: List[str]
|
||||
) -> str:
|
||||
"""Create a complete RAG pipeline with documents."""
|
||||
try:
|
||||
import os
|
||||
|
||||
# Create datastore
|
||||
datastore = self.contextual_client.datastores.create(name=datastore_name)
|
||||
datastore_id = datastore.id
|
||||
|
||||
# Upload documents
|
||||
document_ids = []
|
||||
for doc_path in document_paths:
|
||||
if not os.path.exists(doc_path):
|
||||
raise FileNotFoundError(f"Document not found: {doc_path}")
|
||||
|
||||
with open(doc_path, 'rb') as f:
|
||||
ingestion_result = self.contextual_client.datastores.documents.ingest(datastore_id, file=f)
|
||||
document_ids.append(ingestion_result.id)
|
||||
|
||||
# Create agent
|
||||
agent = self.contextual_client.agents.create(
|
||||
name=agent_name,
|
||||
description=agent_description,
|
||||
datastore_ids=[datastore_id]
|
||||
)
|
||||
|
||||
return f"Successfully created agent '{agent_name}' with ID: {agent.id} and datastore ID: {datastore_id}. Uploaded {len(document_ids)} documents."
|
||||
|
||||
except Exception as e:
|
||||
return f"Failed to create agent with documents: {str(e)}"
|
||||
68
crewai_tools/tools/contextualai_parse_tool/README.md
Normal file
68
crewai_tools/tools/contextualai_parse_tool/README.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# ContextualAIParseTool
|
||||
|
||||
## Description
|
||||
This tool is designed to integrate Contextual AI's enterprise-grade document parsing capabilities with CrewAI, enabling you to leverage advanced AI-powered document understanding for complex layouts, tables, and figures. Use this tool to extract structured content from your documents using Contextual AI's powerful document parser.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```
|
||||
pip install 'crewai[tools]' contextual-client
|
||||
```
|
||||
|
||||
**Note**: You'll need a Contextual AI API key. Sign up at [app.contextual.ai](https://app.contextual.ai) to get your free API key.
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from crewai_tools import ContextualAIParseTool
|
||||
|
||||
tool = ContextualAIParseTool(api_key="your_api_key_here")
|
||||
|
||||
result = tool._run(
|
||||
file_path="/path/to/document.pdf",
|
||||
parse_mode="standard",
|
||||
page_range="0-5",
|
||||
output_types=["markdown-per-page"]
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
The result will show the parsed contents of your document. For example:
|
||||
```
|
||||
{
|
||||
"file_name": "attention_is_all_you_need.pdf",
|
||||
"status": "completed",
|
||||
"pages": [
|
||||
{
|
||||
"index": 0,
|
||||
"markdown": "Provided proper attribution ...
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"markdown": "## 1 Introduction ...
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
## Parameters
|
||||
- `api_key`: Your Contextual AI API key
|
||||
- `file_path`: Path to document to parse
|
||||
- `parse_mode`: Parsing mode (default: "standard")
|
||||
- `figure_caption_mode`: Figure caption handling (default: "concise")
|
||||
- `enable_document_hierarchy`: Enable hierarchy detection (default: True)
|
||||
- `page_range`: Pages to parse (e.g., "0-5", None for all)
|
||||
- `output_types`: Output formats (default: ["markdown-per-page"])
|
||||
|
||||
## Key Features
|
||||
- **Advanced Document Understanding**: Handles complex PDF layouts, tables, and multi-column documents
|
||||
- **Figure and Table Extraction**: Intelligent extraction of figures, charts, and tabular data
|
||||
- **Page Range Selection**: Parse specific pages or entire documents
|
||||
|
||||
## Use Cases
|
||||
- Extract structured content from complex PDFs and research papers
|
||||
- Parse financial reports, legal documents, and technical manuals
|
||||
- Convert documents to markdown for further processing in RAG pipelines
|
||||
|
||||
For more detailed information about Contextual AI's capabilities, visit the [official documentation](https://docs.contextual.ai).
|
||||
@@ -0,0 +1,92 @@
|
||||
from typing import Any, Optional, Type, List
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ContextualAIParseSchema(BaseModel):
|
||||
"""Schema for contextual parse tool."""
|
||||
file_path: str = Field(..., description="Path to the document to parse")
|
||||
parse_mode: str = Field(default="standard", description="Parsing mode")
|
||||
figure_caption_mode: str = Field(default="concise", description="Figure caption mode")
|
||||
enable_document_hierarchy: bool = Field(default=True, description="Enable document hierarchy")
|
||||
page_range: Optional[str] = Field(default=None, description="Page range to parse (e.g., '0-5')")
|
||||
output_types: List[str] = Field(default=["markdown-per-page"], description="List of output types")
|
||||
|
||||
|
||||
class ContextualAIParseTool(BaseTool):
|
||||
"""Tool to parse documents using Contextual AI's parser."""
|
||||
|
||||
name: str = "Contextual AI Document Parser"
|
||||
description: str = "Parse documents using Contextual AI's advanced document parser"
|
||||
args_schema: Type[BaseModel] = ContextualAIParseSchema
|
||||
|
||||
api_key: str
|
||||
package_dependencies: List[str] = ["contextual-client"]
|
||||
|
||||
def _run(
|
||||
self,
|
||||
file_path: str,
|
||||
parse_mode: str = "standard",
|
||||
figure_caption_mode: str = "concise",
|
||||
enable_document_hierarchy: bool = True,
|
||||
page_range: Optional[str] = None,
|
||||
output_types: List[str] = ["markdown-per-page"]
|
||||
) -> str:
|
||||
"""Parse a document using Contextual AI's parser."""
|
||||
try:
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"Document not found: {file_path}")
|
||||
|
||||
base_url = "https://api.contextual.ai/v1"
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
# Submit parse job
|
||||
url = f"{base_url}/parse"
|
||||
config = {
|
||||
"parse_mode": parse_mode,
|
||||
"figure_caption_mode": figure_caption_mode,
|
||||
"enable_document_hierarchy": enable_document_hierarchy,
|
||||
}
|
||||
|
||||
if page_range:
|
||||
config["page_range"] = page_range
|
||||
|
||||
with open(file_path, "rb") as fp:
|
||||
file = {"raw_file": fp}
|
||||
result = requests.post(url, headers=headers, data=config, files=file)
|
||||
response = json.loads(result.text)
|
||||
job_id = response['job_id']
|
||||
|
||||
# Monitor job status
|
||||
status_url = f"{base_url}/parse/jobs/{job_id}/status"
|
||||
while True:
|
||||
result = requests.get(status_url, headers=headers)
|
||||
parse_response = json.loads(result.text)['status']
|
||||
|
||||
if parse_response == "completed":
|
||||
break
|
||||
elif parse_response == "failed":
|
||||
raise RuntimeError("Document parsing failed")
|
||||
|
||||
sleep(5)
|
||||
|
||||
# Get parse results
|
||||
results_url = f"{base_url}/parse/jobs/{job_id}/results"
|
||||
result = requests.get(
|
||||
results_url,
|
||||
headers=headers,
|
||||
params={"output_types": ",".join(output_types)},
|
||||
)
|
||||
|
||||
return json.dumps(json.loads(result.text), indent=2)
|
||||
|
||||
except Exception as e:
|
||||
return f"Failed to parse document: {str(e)}"
|
||||
54
crewai_tools/tools/contextualai_query_tool/README.md
Normal file
54
crewai_tools/tools/contextualai_query_tool/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# ContextualAIQueryTool
|
||||
|
||||
## Description
|
||||
This tool is designed to integrate Contextual AI's enterprise-grade RAG agents with CrewAI. Run this tool to query existing Contextual AI RAG agents that have been pre-configured with documents and knowledge bases.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]' contextual-client
|
||||
```
|
||||
|
||||
**Note**: You'll need a Contextual AI API key. Sign up at [app.contextual.ai](https://app.contextual.ai) to get your free API key.
|
||||
|
||||
## Example
|
||||
|
||||
Make sure you have already created a Contextual agent and ingested documents into the datastore before using this tool.
|
||||
|
||||
```python
|
||||
from crewai_tools import ContextualAIQueryTool
|
||||
|
||||
# Initialize the tool
|
||||
tool = ContextualAIQueryTool(api_key="your_api_key_here")
|
||||
|
||||
# Query the agent with IDs
|
||||
result = tool._run(
|
||||
query="What are the key findings in the financial report?",
|
||||
agent_id="your_agent_id_here",
|
||||
datastore_id="your_datastore_id_here" # Optional: for document readiness checking
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
The result will contain the generated answer to the user's query.
|
||||
|
||||
## Parameters
|
||||
**Initialization:**
|
||||
- `api_key`: Your Contextual AI API key
|
||||
|
||||
**Query (_run method):**
|
||||
- `query`: The question or query to send to the agent
|
||||
- `agent_id`: ID of the existing Contextual AI agent to query (required)
|
||||
- `datastore_id`: Optional datastore ID for document readiness verification (if not provided, document status checking is disabled with a warning)
|
||||
|
||||
## Key Features
|
||||
- **Document Readiness Checking**: Automatically waits for documents to be processed before querying
|
||||
- **Grounded Responses**: Built-in grounding ensures factual, source-attributed answers
|
||||
|
||||
## Use Cases
|
||||
- Query pre-configured RAG agents with document collections
|
||||
- Access enterprise knowledge bases through user queries
|
||||
- Build specialized domain experts with access to curated documents
|
||||
|
||||
For more detailed information about Contextual AI's capabilities, visit the [official documentation](https://docs.contextual.ai).
|
||||
@@ -0,0 +1,99 @@
|
||||
from typing import Any, Optional, Type, List
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
import asyncio
|
||||
import requests
|
||||
import os
|
||||
|
||||
|
||||
class ContextualAIQuerySchema(BaseModel):
|
||||
"""Schema for contextual query tool."""
|
||||
query: str = Field(..., description="Query to send to the Contextual AI agent.")
|
||||
agent_id: str = Field(..., description="ID of the Contextual AI agent to query")
|
||||
datastore_id: Optional[str] = Field(None, description="Optional datastore ID for document readiness verification")
|
||||
|
||||
|
||||
class ContextualAIQueryTool(BaseTool):
|
||||
"""Tool to query Contextual AI RAG agents."""
|
||||
|
||||
name: str = "Contextual AI Query Tool"
|
||||
description: str = "Use this tool to query a Contextual AI RAG agent with access to your documents"
|
||||
args_schema: Type[BaseModel] = ContextualAIQuerySchema
|
||||
|
||||
api_key: str
|
||||
contextual_client: Any = None
|
||||
package_dependencies: List[str] = ["contextual-client"]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from contextual import ContextualAI
|
||||
self.contextual_client = ContextualAI(api_key=self.api_key)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"contextual-client package is required. Install it with: pip install contextual-client"
|
||||
)
|
||||
|
||||
def _check_documents_ready(self, datastore_id: str) -> bool:
|
||||
"""Synchronous check if all documents are ready."""
|
||||
url = f"https://api.contextual.ai/v1/datastores/{datastore_id}/documents"
|
||||
headers = {"Authorization": f"Bearer {self.api_key}"}
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
documents = data.get('documents', [])
|
||||
return not any(doc.get('status') in ('processing', 'pending') for doc in documents)
|
||||
return True
|
||||
|
||||
async def _wait_for_documents_async(self, datastore_id: str, max_attempts: int = 20, interval: float = 30.0) -> bool:
|
||||
"""Asynchronously poll until documents are ready, exiting early if possible."""
|
||||
for attempt in range(max_attempts):
|
||||
ready = await asyncio.to_thread(self._check_documents_ready, datastore_id)
|
||||
if ready:
|
||||
return True
|
||||
await asyncio.sleep(interval)
|
||||
print("Processing documents ...")
|
||||
return True # give up but don't fail hard
|
||||
|
||||
def _run(self, query: str, agent_id: str, datastore_id: Optional[str] = None) -> str:
|
||||
if not agent_id:
|
||||
raise ValueError("Agent ID is required to query the Contextual AI agent")
|
||||
|
||||
if datastore_id:
|
||||
ready = self._check_documents_ready(datastore_id)
|
||||
if not ready:
|
||||
try:
|
||||
# If no running event loop, use asyncio.run
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
loop = None
|
||||
|
||||
if loop and loop.is_running():
|
||||
# Already inside an event loop
|
||||
try:
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply(loop)
|
||||
loop.run_until_complete(self._wait_for_documents_async(datastore_id))
|
||||
except Exception as e:
|
||||
print(f"Failed to apply nest_asyncio: {str(e)}")
|
||||
else:
|
||||
asyncio.run(self._wait_for_documents_async(datastore_id))
|
||||
else:
|
||||
print("Warning: No datastore_id provided. Document status checking disabled.")
|
||||
|
||||
try:
|
||||
response = self.contextual_client.agents.query.create(
|
||||
agent_id=agent_id,
|
||||
messages=[{"role": "user", "content": query}]
|
||||
)
|
||||
if hasattr(response, 'content'):
|
||||
return response.content
|
||||
elif hasattr(response, 'message'):
|
||||
return response.message.content if hasattr(response.message, 'content') else str(response.message)
|
||||
elif hasattr(response, 'messages') and len(response.messages) > 0:
|
||||
last_message = response.messages[-1]
|
||||
return last_message.content if hasattr(last_message, 'content') else str(last_message)
|
||||
else:
|
||||
return str(response)
|
||||
except Exception as e:
|
||||
return f"Error querying Contextual AI agent: {str(e)}"
|
||||
72
crewai_tools/tools/contextualai_rerank_tool/README.md
Normal file
72
crewai_tools/tools/contextualai_rerank_tool/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# ContextualAIRerankTool
|
||||
|
||||
## Description
|
||||
This tool is designed to integrate Contextual AI's enterprise-grade instruction-following reranker with CrewAI, enabling you to intelligently reorder documents based on relevance and custom criteria. Use this tool to enhance search result quality and document retrieval for RAG systems using Contextual AI's reranking models that understand context and follow specific instructions for optimal document ordering.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]' contextual-client
|
||||
```
|
||||
|
||||
**Note**: You'll need a Contextual AI API key. Sign up at [app.contextual.ai](https://app.contextual.ai) to get your free API key.
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from crewai_tools import ContextualAIRerankTool
|
||||
|
||||
tool = ContextualAIRerankTool(api_key="your_api_key_here")
|
||||
|
||||
result = tool._run(
|
||||
query="financial performance and revenue metrics",
|
||||
documents=[
|
||||
"Q1 report content with revenue data",
|
||||
"Q2 report content with growth metrics",
|
||||
"News article about market trends"
|
||||
],
|
||||
instruction="Prioritize documents with specific financial metrics and quantitative data"
|
||||
)
|
||||
print(result)
|
||||
```
|
||||
|
||||
The result will contain the document ranking. For example:
|
||||
```
|
||||
Rerank Result:
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"index": 1,
|
||||
"relevance_score": 0.88227631
|
||||
},
|
||||
{
|
||||
"index": 0,
|
||||
"relevance_score": 0.61159354
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"relevance_score": 0.28579462
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Parameters
|
||||
- `api_key`: Your Contextual AI API key
|
||||
- `query`: Search query for reranking
|
||||
- `documents`: List of document texts to rerank
|
||||
- `instruction`: Optional reranking instruction for custom criteria
|
||||
- `metadata`: Optional metadata for each document
|
||||
- `model`: Reranker model (default: "ctxl-rerank-en-v1-instruct")
|
||||
|
||||
## Key Features
|
||||
- **Instruction-Following Reranking**: Follows custom instructions for domain-specific document ordering
|
||||
- **Metadata Integration**: Incorporates document metadata for enhanced ranking decisions
|
||||
|
||||
## Use Cases
|
||||
- Improve search result relevance in document collections
|
||||
- Reorder documents by custom business criteria (recency, authority, relevance)
|
||||
- Filter and prioritize documents for research and analysis workflows
|
||||
|
||||
For more detailed information about Contextual AI's capabilities, visit the [official documentation](https://docs.contextual.ai).
|
||||
@@ -0,0 +1,68 @@
|
||||
from typing import Any, Optional, Type, List
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ContextualAIRerankSchema(BaseModel):
|
||||
"""Schema for contextual rerank tool."""
|
||||
query: str = Field(..., description="The search query to rerank documents against")
|
||||
documents: List[str] = Field(..., description="List of document texts to rerank")
|
||||
instruction: Optional[str] = Field(default=None, description="Optional instruction for reranking behavior")
|
||||
metadata: Optional[List[str]] = Field(default=None, description="Optional metadata for each document")
|
||||
model: str = Field(default="ctxl-rerank-en-v1-instruct", description="Reranker model to use")
|
||||
|
||||
|
||||
class ContextualAIRerankTool(BaseTool):
|
||||
"""Tool to rerank documents using Contextual AI's instruction-following reranker."""
|
||||
|
||||
name: str = "Contextual AI Document Reranker"
|
||||
description: str = "Rerank documents using Contextual AI's instruction-following reranker"
|
||||
args_schema: Type[BaseModel] = ContextualAIRerankSchema
|
||||
|
||||
api_key: str
|
||||
package_dependencies: List[str] = ["contextual-client"]
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
documents: List[str],
|
||||
instruction: Optional[str] = None,
|
||||
metadata: Optional[List[str]] = None,
|
||||
model: str = "ctxl-rerank-en-v1-instruct"
|
||||
) -> str:
|
||||
"""Rerank documents using Contextual AI's instruction-following reranker."""
|
||||
try:
|
||||
import requests
|
||||
import json
|
||||
|
||||
base_url = "https://api.contextual.ai/v1"
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
"authorization": f"Bearer {self.api_key}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"query": query,
|
||||
"documents": documents,
|
||||
"model": model
|
||||
}
|
||||
|
||||
if instruction:
|
||||
payload["instruction"] = instruction
|
||||
|
||||
if metadata:
|
||||
if len(metadata) != len(documents):
|
||||
raise ValueError("Metadata list must have the same length as documents list")
|
||||
payload["metadata"] = metadata
|
||||
|
||||
rerank_url = f"{base_url}/rerank"
|
||||
result = requests.post(rerank_url, json=payload, headers=headers)
|
||||
|
||||
if result.status_code != 200:
|
||||
raise RuntimeError(f"Reranker API returned status {result.status_code}: {result.text}")
|
||||
|
||||
return json.dumps(result.json(), indent=2)
|
||||
|
||||
except Exception as e:
|
||||
return f"Failed to rerank documents: {str(e)}"
|
||||
62
crewai_tools/tools/couchbase_tool/README.md
Normal file
62
crewai_tools/tools/couchbase_tool/README.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# CouchbaseFTSVectorSearchTool
|
||||
## Description
|
||||
Couchbase is a NoSQL database with vector search capabilities. Users can store and query vector embeddings. You can learn more about Couchbase vector search here: https://docs.couchbase.com/cloud/vector-search/vector-search.html
|
||||
|
||||
This tool is specifically crafted for performing semantic search using Couchbase. Use this tool to find semantically similar docs to a given query.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package by executing the following command in your terminal:
|
||||
|
||||
```shell
|
||||
uv pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Setup
|
||||
Before instantiating the tool, you need a Couchbase cluster.
|
||||
- Create a cluster on [Couchbase Capella](https://docs.couchbase.com/cloud/get-started/create-account.html), Couchbase's cloud database solution.
|
||||
- Create a [local Couchbase server](https://docs.couchbase.com/server/current/getting-started/start-here.html).
|
||||
|
||||
You will need to create a bucket, scope and collection on the cluster. Then, [follow this guide](https://docs.couchbase.com/python-sdk/current/hello-world/start-using-sdk.html) to create a Couchbase Cluster object and load documents into your collection.
|
||||
|
||||
Follow the docs below to create a vector search index on Couchbase.
|
||||
- [Create a vector search index on Couchbase Capella.](https://docs.couchbase.com/cloud/vector-search/create-vector-search-index-ui.html)
|
||||
- [Create a vector search index on your local Couchbase server.](https://docs.couchbase.com/server/current/vector-search/create-vector-search-index-ui.html)
|
||||
|
||||
Ensure that the `Dimension` field in the index matches the embedding model. For example, OpenAI's `text-embedding-3-small` model has an embedding dimension of 1536 dimensions, and so the `Dimension` field must be 1536 in the index.
|
||||
|
||||
## Example
|
||||
To utilize the CouchbaseFTSVectorSearchTool for different use cases, follow these examples:
|
||||
|
||||
```python
|
||||
from crewai_tools import CouchbaseFTSVectorSearchTool
|
||||
|
||||
# Instantiate a Couchbase Cluster object from the Couchbase SDK
|
||||
|
||||
tool = CouchbaseFTSVectorSearchTool(
|
||||
cluster=cluster,
|
||||
collection_name="collection",
|
||||
scope_name="scope",
|
||||
bucket_name="bucket",
|
||||
index_name="index",
|
||||
embedding_function=embed_fn
|
||||
)
|
||||
|
||||
# Adding the tool to an agent
|
||||
rag_agent = Agent(
|
||||
name="rag_agent",
|
||||
role="You are a helpful assistant that can answer questions with the help of the CouchbaseFTSVectorSearchTool.",
|
||||
llm="gpt-4o-mini",
|
||||
tools=[tool],
|
||||
)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `cluster`: An initialized Couchbase `Cluster` instance.
|
||||
- `bucket_name`: The name of the Couchbase bucket.
|
||||
- `scope_name`: The name of the scope within the bucket.
|
||||
- `collection_name`: The name of the collection within the scope.
|
||||
- `index_name`: The name of the search index (vector index).
|
||||
- `embedding_function`: A function that takes a string and returns its embedding (list of floats).
|
||||
- `embedding_key`: Name of the field in the search index storing the vector. (Optional, defaults to 'embedding')
|
||||
- `scoped_index`: Whether the index is scoped (True) or cluster-level (False). (Optional, defaults to True)
|
||||
- `limit`: The maximum number of search results to return. (Optional, defaults to 3)
|
||||
241
crewai_tools/tools/couchbase_tool/couchbase_tool.py
Normal file
241
crewai_tools/tools/couchbase_tool/couchbase_tool.py
Normal file
@@ -0,0 +1,241 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Optional, Type, List, Dict, Callable
|
||||
|
||||
try:
|
||||
import couchbase.search as search
|
||||
from couchbase.cluster import Cluster
|
||||
from couchbase.options import SearchOptions
|
||||
from couchbase.vector_search import VectorQuery, VectorSearch
|
||||
|
||||
COUCHBASE_AVAILABLE = True
|
||||
except ImportError:
|
||||
COUCHBASE_AVAILABLE = False
|
||||
search = Any
|
||||
Cluster = Any
|
||||
SearchOptions = Any
|
||||
VectorQuery = Any
|
||||
VectorSearch = Any
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field, SkipValidation
|
||||
|
||||
|
||||
class CouchbaseToolSchema(BaseModel):
|
||||
"""Input for CouchbaseTool."""
|
||||
|
||||
query: str = Field(
|
||||
...,
|
||||
description="The query to search retrieve relevant information from the Couchbase database. Pass only the query, not the question.",
|
||||
)
|
||||
|
||||
class CouchbaseFTSVectorSearchTool(BaseTool):
|
||||
"""Tool to search the Couchbase database"""
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
name: str = "CouchbaseFTSVectorSearchTool"
|
||||
description: str = "A tool to search the Couchbase database for relevant information on internal documents."
|
||||
args_schema: Type[BaseModel] = CouchbaseToolSchema
|
||||
cluster: SkipValidation[Optional[Cluster]] = None
|
||||
collection_name: Optional[str] = None,
|
||||
scope_name: Optional[str] = None,
|
||||
bucket_name: Optional[str] = None,
|
||||
index_name: Optional[str] = None,
|
||||
embedding_key: Optional[str] = Field(
|
||||
default="embedding",
|
||||
description="Name of the field in the search index that stores the vector"
|
||||
)
|
||||
scoped_index: Optional[bool] = Field(
|
||||
default=True,
|
||||
description="Specify whether the index is scoped. Is True by default."
|
||||
),
|
||||
limit: Optional[int] = Field(default=3)
|
||||
embedding_function: SkipValidation[Callable[[str], List[float]]] = Field(
|
||||
default=None,
|
||||
description="A function that takes a string and returns a list of floats. This is used to embed the query before searching the database."
|
||||
)
|
||||
|
||||
def _check_bucket_exists(self) -> bool:
|
||||
"""Check if the bucket exists in the linked Couchbase cluster"""
|
||||
bucket_manager = self.cluster.buckets()
|
||||
try:
|
||||
bucket_manager.get_bucket(self.bucket_name)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _check_scope_and_collection_exists(self) -> bool:
|
||||
"""Check if the scope and collection exists in the linked Couchbase bucket
|
||||
Raises a ValueError if either is not found"""
|
||||
scope_collection_map: Dict[str, Any] = {}
|
||||
|
||||
# Get a list of all scopes in the bucket
|
||||
for scope in self._bucket.collections().get_all_scopes():
|
||||
scope_collection_map[scope.name] = []
|
||||
|
||||
# Get a list of all the collections in the scope
|
||||
for collection in scope.collections:
|
||||
scope_collection_map[scope.name].append(collection.name)
|
||||
|
||||
# Check if the scope exists
|
||||
if self.scope_name not in scope_collection_map.keys():
|
||||
raise ValueError(
|
||||
f"Scope {self.scope_name} not found in Couchbase "
|
||||
f"bucket {self.bucket_name}"
|
||||
)
|
||||
|
||||
# Check if the collection exists in the scope
|
||||
if self.collection_name not in scope_collection_map[self.scope_name]:
|
||||
raise ValueError(
|
||||
f"Collection {self.collection_name} not found in scope "
|
||||
f"{self.scope_name} in Couchbase bucket {self.bucket_name}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def _check_index_exists(self) -> bool:
|
||||
"""Check if the Search index exists in the linked Couchbase cluster
|
||||
Raises a ValueError if the index does not exist"""
|
||||
if self.scoped_index:
|
||||
all_indexes = [
|
||||
index.name for index in self._scope.search_indexes().get_all_indexes()
|
||||
]
|
||||
if self.index_name not in all_indexes:
|
||||
raise ValueError(
|
||||
f"Index {self.index_name} does not exist. "
|
||||
" Please create the index before searching."
|
||||
)
|
||||
else:
|
||||
all_indexes = [
|
||||
index.name for index in self.cluster.search_indexes().get_all_indexes()
|
||||
]
|
||||
if self.index_name not in all_indexes:
|
||||
raise ValueError(
|
||||
f"Index {self.index_name} does not exist. "
|
||||
" Please create the index before searching."
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the CouchbaseFTSVectorSearchTool.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments to pass to the BaseTool constructor and
|
||||
to configure the Couchbase connection and search parameters.
|
||||
Requires 'cluster', 'bucket_name', 'scope_name',
|
||||
'collection_name', 'index_name', and 'embedding_function'.
|
||||
|
||||
Raises:
|
||||
ValueError: If required parameters are missing, the Couchbase cluster
|
||||
cannot be reached, or the specified bucket, scope,
|
||||
collection, or index does not exist.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
if COUCHBASE_AVAILABLE:
|
||||
try:
|
||||
if not self.cluster:
|
||||
raise ValueError("Cluster instance must be provided")
|
||||
|
||||
if not self.bucket_name:
|
||||
raise ValueError("Bucket name must be provided")
|
||||
|
||||
if not self.scope_name:
|
||||
raise ValueError("Scope name must be provided")
|
||||
|
||||
if not self.collection_name:
|
||||
raise ValueError("Collection name must be provided")
|
||||
|
||||
if not self.index_name:
|
||||
raise ValueError("Index name must be provided")
|
||||
|
||||
if not self.embedding_function:
|
||||
raise ValueError("Embedding function must be provided")
|
||||
|
||||
self._bucket = self.cluster.bucket(self.bucket_name)
|
||||
self._scope = self._bucket.scope(self.scope_name)
|
||||
self._collection = self._scope.collection(self.collection_name)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
"Error connecting to couchbase. "
|
||||
"Please check the connection and credentials"
|
||||
) from e
|
||||
|
||||
# check if bucket exists
|
||||
if not self._check_bucket_exists():
|
||||
raise ValueError(
|
||||
f"Bucket {self.bucket_name} does not exist. "
|
||||
" Please create the bucket before searching."
|
||||
)
|
||||
|
||||
self._check_scope_and_collection_exists()
|
||||
self._check_index_exists()
|
||||
else:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"The 'couchbase' package is required to use the CouchbaseFTSVectorSearchTool. "
|
||||
"Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "couchbase"], check=True)
|
||||
else:
|
||||
raise ImportError(
|
||||
"The 'couchbase' package is required to use the CouchbaseFTSVectorSearchTool. "
|
||||
"Please install it with: uv add couchbase"
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Execute a vector search query against the Couchbase index.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
|
||||
Returns:
|
||||
A JSON string containing the search results.
|
||||
|
||||
Raises:
|
||||
ValueError: If the search query fails or returns results without fields.
|
||||
"""
|
||||
query_embedding = self.embedding_function(query)
|
||||
fields = ["*"]
|
||||
|
||||
search_req = search.SearchRequest.create(
|
||||
VectorSearch.from_vector_query(
|
||||
VectorQuery(
|
||||
self.embedding_key,
|
||||
query_embedding,
|
||||
self.limit
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
if self.scoped_index:
|
||||
search_iter = self._scope.search(
|
||||
self.index_name,
|
||||
search_req,
|
||||
SearchOptions(
|
||||
limit=self.limit,
|
||||
fields=fields,
|
||||
)
|
||||
)
|
||||
else:
|
||||
search_iter = self.cluster.search(
|
||||
self.index_name,
|
||||
search_req,
|
||||
SearchOptions(
|
||||
limit=self.limit,
|
||||
fields=fields
|
||||
)
|
||||
)
|
||||
|
||||
json_response = []
|
||||
|
||||
for row in search_iter.rows():
|
||||
json_response.append(row.fields)
|
||||
except Exception as e:
|
||||
return f"Search failed with error: {e}"
|
||||
|
||||
return json.dumps(json_response, indent=2)
|
||||
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
Crewai Enterprise Tools
|
||||
"""
|
||||
|
||||
import os
|
||||
import typing as t
|
||||
import logging
|
||||
import json
|
||||
from crewai.tools import BaseTool
|
||||
from crewai_tools.adapters.enterprise_adapter import EnterpriseActionKitToolAdapter
|
||||
from crewai_tools.adapters.tool_collection import ToolCollection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def CrewaiEnterpriseTools(
|
||||
enterprise_token: t.Optional[str] = None,
|
||||
actions_list: t.Optional[t.List[str]] = None,
|
||||
enterprise_action_kit_project_id: t.Optional[str] = None,
|
||||
enterprise_action_kit_project_url: t.Optional[str] = None,
|
||||
) -> ToolCollection[BaseTool]:
|
||||
"""Factory function that returns crewai enterprise tools.
|
||||
|
||||
Args:
|
||||
enterprise_token: The token for accessing enterprise actions.
|
||||
If not provided, will try to use CREWAI_ENTERPRISE_TOOLS_TOKEN env var.
|
||||
actions_list: Optional list of specific tool names to include.
|
||||
If provided, only tools with these names will be returned.
|
||||
enterprise_action_kit_project_id: Optional ID of the Enterprise Action Kit project.
|
||||
enterprise_action_kit_project_url: Optional URL of the Enterprise Action Kit project.
|
||||
|
||||
Returns:
|
||||
A ToolCollection of BaseTool instances for enterprise actions
|
||||
"""
|
||||
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"CrewaiEnterpriseTools will be removed in v1.0.0. Considering use `Agent(apps=[...])` instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
|
||||
if enterprise_token is None or enterprise_token == "":
|
||||
enterprise_token = os.environ.get("CREWAI_ENTERPRISE_TOOLS_TOKEN")
|
||||
if not enterprise_token:
|
||||
logger.warning("No enterprise token provided")
|
||||
|
||||
adapter_kwargs = {"enterprise_action_token": enterprise_token}
|
||||
|
||||
if enterprise_action_kit_project_id is not None:
|
||||
adapter_kwargs["enterprise_action_kit_project_id"] = (
|
||||
enterprise_action_kit_project_id
|
||||
)
|
||||
if enterprise_action_kit_project_url is not None:
|
||||
adapter_kwargs["enterprise_action_kit_project_url"] = (
|
||||
enterprise_action_kit_project_url
|
||||
)
|
||||
|
||||
adapter = EnterpriseActionKitToolAdapter(**adapter_kwargs)
|
||||
all_tools = adapter.tools()
|
||||
parsed_actions_list = _parse_actions_list(actions_list)
|
||||
|
||||
# Filter tools based on the provided list
|
||||
return ToolCollection(all_tools).filter_by_names(parsed_actions_list)
|
||||
|
||||
|
||||
# ENTERPRISE INJECTION ONLY
|
||||
def _parse_actions_list(actions_list: t.Optional[t.List[str]]) -> t.List[str] | None:
|
||||
"""Parse a string representation of a list of tool names to a list of tool names.
|
||||
|
||||
Args:
|
||||
actions_list: A string representation of a list of tool names.
|
||||
|
||||
Returns:
|
||||
A list of tool names.
|
||||
"""
|
||||
if actions_list is not None:
|
||||
return actions_list
|
||||
|
||||
actions_list_from_env = os.environ.get("CREWAI_ENTERPRISE_TOOLS_ACTIONS_LIST")
|
||||
if actions_list_from_env is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
return json.loads(actions_list_from_env)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse actions_list as JSON: {actions_list_from_env}")
|
||||
return None
|
||||
16
crewai_tools/tools/crewai_platform_tools/__init__.py
Normal file
16
crewai_tools/tools/crewai_platform_tools/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""CrewAI Platform Tools
|
||||
|
||||
This module provides tools for integrating with various platform applications
|
||||
through the CrewAI platform API.
|
||||
"""
|
||||
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import CrewaiPlatformTools
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import CrewAIPlatformActionTool
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import CrewaiPlatformToolBuilder
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CrewaiPlatformTools",
|
||||
"CrewAIPlatformActionTool",
|
||||
"CrewaiPlatformToolBuilder",
|
||||
]
|
||||
@@ -0,0 +1,233 @@
|
||||
"""
|
||||
Crewai Enterprise Tools
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, Any, List, Type, Optional, Union, get_origin, cast, Literal
|
||||
from pydantic import Field, create_model
|
||||
from crewai.tools import BaseTool
|
||||
from crewai_tools.tools.crewai_platform_tools.misc import get_platform_api_base_url, get_platform_integration_token
|
||||
|
||||
|
||||
class CrewAIPlatformActionTool(BaseTool):
|
||||
action_name: str = Field(default="", description="The name of the action")
|
||||
action_schema: Dict[str, Any] = Field(
|
||||
default_factory=dict, description="The schema of the action"
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
description: str,
|
||||
action_name: str,
|
||||
action_schema: Dict[str, Any],
|
||||
):
|
||||
self._model_registry = {}
|
||||
self._base_name = self._sanitize_name(action_name)
|
||||
|
||||
schema_props, required = self._extract_schema_info(action_schema)
|
||||
|
||||
field_definitions = {}
|
||||
for param_name, param_details in schema_props.items():
|
||||
param_desc = param_details.get("description", "")
|
||||
is_required = param_name in required
|
||||
|
||||
try:
|
||||
field_type = self._process_schema_type(
|
||||
param_details, self._sanitize_name(param_name).title()
|
||||
)
|
||||
except Exception as e:
|
||||
field_type = str
|
||||
|
||||
field_definitions[param_name] = self._create_field_definition(
|
||||
field_type, is_required, param_desc
|
||||
)
|
||||
|
||||
if field_definitions:
|
||||
try:
|
||||
args_schema = create_model(
|
||||
f"{self._base_name}Schema", **field_definitions
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not create main schema model: {e}")
|
||||
args_schema = create_model(
|
||||
f"{self._base_name}Schema",
|
||||
input_text=(str, Field(description="Input for the action")),
|
||||
)
|
||||
else:
|
||||
args_schema = create_model(
|
||||
f"{self._base_name}Schema",
|
||||
input_text=(str, Field(description="Input for the action")),
|
||||
)
|
||||
|
||||
super().__init__(name=action_name.lower().replace(" ", "_"), description=description, args_schema=args_schema)
|
||||
self.action_name = action_name
|
||||
self.action_schema = action_schema
|
||||
|
||||
def _sanitize_name(self, name: str) -> str:
|
||||
name = name.lower().replace(" ", "_")
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9_]", "", name)
|
||||
parts = sanitized.split("_")
|
||||
return "".join(word.capitalize() for word in parts if word)
|
||||
|
||||
def _extract_schema_info(
|
||||
self, action_schema: Dict[str, Any]
|
||||
) -> tuple[Dict[str, Any], List[str]]:
|
||||
schema_props = (
|
||||
action_schema.get("function", {})
|
||||
.get("parameters", {})
|
||||
.get("properties", {})
|
||||
)
|
||||
required = (
|
||||
action_schema.get("function", {}).get("parameters", {}).get("required", [])
|
||||
)
|
||||
return schema_props, required
|
||||
|
||||
def _process_schema_type(self, schema: Dict[str, Any], type_name: str) -> Type[Any]:
|
||||
if "anyOf" in schema:
|
||||
any_of_types = schema["anyOf"]
|
||||
is_nullable = any(t.get("type") == "null" for t in any_of_types)
|
||||
non_null_types = [t for t in any_of_types if t.get("type") != "null"]
|
||||
|
||||
if non_null_types:
|
||||
base_type = self._process_schema_type(non_null_types[0], type_name)
|
||||
return Optional[base_type] if is_nullable else base_type
|
||||
return cast(Type[Any], Optional[str])
|
||||
|
||||
if "oneOf" in schema:
|
||||
return self._process_schema_type(schema["oneOf"][0], type_name)
|
||||
|
||||
if "allOf" in schema:
|
||||
return self._process_schema_type(schema["allOf"][0], type_name)
|
||||
|
||||
json_type = schema.get("type", "string")
|
||||
|
||||
if "enum" in schema:
|
||||
enum_values = schema["enum"]
|
||||
if not enum_values:
|
||||
return self._map_json_type_to_python(json_type)
|
||||
return Literal[tuple(enum_values)]
|
||||
|
||||
if json_type == "array":
|
||||
items_schema = schema.get("items", {"type": "string"})
|
||||
item_type = self._process_schema_type(items_schema, f"{type_name}Item")
|
||||
return List[item_type]
|
||||
|
||||
if json_type == "object":
|
||||
return self._create_nested_model(schema, type_name)
|
||||
|
||||
return self._map_json_type_to_python(json_type)
|
||||
|
||||
def _create_nested_model(self, schema: Dict[str, Any], model_name: str) -> Type[Any]:
|
||||
full_model_name = f"{self._base_name}{model_name}"
|
||||
|
||||
if full_model_name in self._model_registry:
|
||||
return self._model_registry[full_model_name]
|
||||
|
||||
properties = schema.get("properties", {})
|
||||
required_fields = schema.get("required", [])
|
||||
|
||||
if not properties:
|
||||
return dict
|
||||
|
||||
field_definitions = {}
|
||||
for prop_name, prop_schema in properties.items():
|
||||
prop_desc = prop_schema.get("description", "")
|
||||
is_required = prop_name in required_fields
|
||||
|
||||
try:
|
||||
prop_type = self._process_schema_type(
|
||||
prop_schema, f"{model_name}{self._sanitize_name(prop_name).title()}"
|
||||
)
|
||||
except Exception as e:
|
||||
prop_type = str
|
||||
|
||||
field_definitions[prop_name] = self._create_field_definition(
|
||||
prop_type, is_required, prop_desc
|
||||
)
|
||||
|
||||
try:
|
||||
nested_model = create_model(full_model_name, **field_definitions)
|
||||
self._model_registry[full_model_name] = nested_model
|
||||
return nested_model
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not create nested model {full_model_name}: {e}")
|
||||
return dict
|
||||
|
||||
def _create_field_definition(
|
||||
self, field_type: Type[Any], is_required: bool, description: str
|
||||
) -> tuple:
|
||||
if is_required:
|
||||
return (field_type, Field(description=description))
|
||||
else:
|
||||
if get_origin(field_type) is Union:
|
||||
return (field_type, Field(default=None, description=description))
|
||||
else:
|
||||
return (
|
||||
Optional[field_type],
|
||||
Field(default=None, description=description),
|
||||
)
|
||||
|
||||
def _map_json_type_to_python(self, json_type: str) -> Type[Any]:
|
||||
type_mapping = {
|
||||
"string": str,
|
||||
"integer": int,
|
||||
"number": float,
|
||||
"boolean": bool,
|
||||
"array": list,
|
||||
"object": dict,
|
||||
"null": type(None),
|
||||
}
|
||||
return type_mapping.get(json_type, str)
|
||||
|
||||
def _get_required_nullable_fields(self) -> List[str]:
|
||||
schema_props, required = self._extract_schema_info(self.action_schema)
|
||||
|
||||
required_nullable_fields = []
|
||||
for param_name in required:
|
||||
param_details = schema_props.get(param_name, {})
|
||||
if self._is_nullable_type(param_details):
|
||||
required_nullable_fields.append(param_name)
|
||||
|
||||
return required_nullable_fields
|
||||
|
||||
def _is_nullable_type(self, schema: Dict[str, Any]) -> bool:
|
||||
if "anyOf" in schema:
|
||||
return any(t.get("type") == "null" for t in schema["anyOf"])
|
||||
return schema.get("type") == "null"
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
try:
|
||||
cleaned_kwargs = {}
|
||||
for key, value in kwargs.items():
|
||||
if value is not None:
|
||||
cleaned_kwargs[key] = value
|
||||
|
||||
required_nullable_fields = self._get_required_nullable_fields()
|
||||
|
||||
for field_name in required_nullable_fields:
|
||||
if field_name not in cleaned_kwargs:
|
||||
cleaned_kwargs[field_name] = None
|
||||
|
||||
|
||||
api_url = f"{get_platform_api_base_url()}/actions/{self.action_name}/execute"
|
||||
token = get_platform_integration_token()
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = cleaned_kwargs
|
||||
|
||||
response = requests.post(
|
||||
url=api_url, headers=headers, json=payload, timeout=60
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
if not response.ok:
|
||||
error_message = data.get("error", {}).get("message", json.dumps(data))
|
||||
return f"API request failed: {error_message}"
|
||||
|
||||
return json.dumps(data, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
return f"Error executing action {self.action_name}: {str(e)}"
|
||||
@@ -0,0 +1,135 @@
|
||||
|
||||
import requests
|
||||
from typing import List, Any, Dict
|
||||
from crewai.tools import BaseTool
|
||||
from crewai_tools.tools.crewai_platform_tools.misc import get_platform_api_base_url, get_platform_integration_token
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import CrewAIPlatformActionTool
|
||||
|
||||
|
||||
class CrewaiPlatformToolBuilder:
|
||||
def __init__(
|
||||
self,
|
||||
apps: list[str],
|
||||
):
|
||||
self._apps = apps
|
||||
self._actions_schema = {}
|
||||
self._tools = None
|
||||
|
||||
def tools(self) -> list[BaseTool]:
|
||||
if self._tools is None:
|
||||
self._fetch_actions()
|
||||
self._create_tools()
|
||||
return self._tools if self._tools is not None else []
|
||||
|
||||
def _fetch_actions(self):
|
||||
actions_url = f"{get_platform_api_base_url()}/actions"
|
||||
headers = {"Authorization": f"Bearer {get_platform_integration_token()}"}
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
actions_url, headers=headers, timeout=30, params={"apps": ",".join(self._apps)}
|
||||
)
|
||||
response.raise_for_status()
|
||||
except Exception as e:
|
||||
return
|
||||
|
||||
|
||||
raw_data = response.json()
|
||||
|
||||
self._actions_schema = {}
|
||||
action_categories = raw_data.get("actions", {})
|
||||
|
||||
for app, action_list in action_categories.items():
|
||||
if isinstance(action_list, list):
|
||||
for action in action_list:
|
||||
if action_name := action.get("name"):
|
||||
action_schema = {
|
||||
"function": {
|
||||
"name": action_name,
|
||||
"description": action.get("description", f"Execute {action_name}"),
|
||||
"parameters": action.get("parameters", {}),
|
||||
"app": app,
|
||||
}
|
||||
}
|
||||
self._actions_schema[action_name] = action_schema
|
||||
|
||||
def _generate_detailed_description(
|
||||
self, schema: Dict[str, Any], indent: int = 0
|
||||
) -> List[str]:
|
||||
descriptions = []
|
||||
indent_str = " " * indent
|
||||
|
||||
schema_type = schema.get("type", "string")
|
||||
|
||||
if schema_type == "object":
|
||||
properties = schema.get("properties", {})
|
||||
required_fields = schema.get("required", [])
|
||||
|
||||
if properties:
|
||||
descriptions.append(f"{indent_str}Object with properties:")
|
||||
for prop_name, prop_schema in properties.items():
|
||||
prop_desc = prop_schema.get("description", "")
|
||||
is_required = prop_name in required_fields
|
||||
req_str = " (required)" if is_required else " (optional)"
|
||||
descriptions.append(
|
||||
f"{indent_str} - {prop_name}: {prop_desc}{req_str}"
|
||||
)
|
||||
|
||||
if prop_schema.get("type") == "object":
|
||||
descriptions.extend(
|
||||
self._generate_detailed_description(prop_schema, indent + 2)
|
||||
)
|
||||
elif prop_schema.get("type") == "array":
|
||||
items_schema = prop_schema.get("items", {})
|
||||
if items_schema.get("type") == "object":
|
||||
descriptions.append(f"{indent_str} Array of objects:")
|
||||
descriptions.extend(
|
||||
self._generate_detailed_description(
|
||||
items_schema, indent + 3
|
||||
)
|
||||
)
|
||||
elif "enum" in items_schema:
|
||||
descriptions.append(
|
||||
f"{indent_str} Array of enum values: {items_schema['enum']}"
|
||||
)
|
||||
elif "enum" in prop_schema:
|
||||
descriptions.append(
|
||||
f"{indent_str} Enum values: {prop_schema['enum']}"
|
||||
)
|
||||
|
||||
return descriptions
|
||||
|
||||
def _create_tools(self):
|
||||
tools = []
|
||||
|
||||
for action_name, action_schema in self._actions_schema.items():
|
||||
function_details = action_schema.get("function", {})
|
||||
description = function_details.get("description", f"Execute {action_name}")
|
||||
|
||||
parameters = function_details.get("parameters", {})
|
||||
param_descriptions = []
|
||||
|
||||
if parameters.get("properties"):
|
||||
param_descriptions.append("\nDetailed Parameter Structure:")
|
||||
param_descriptions.extend(
|
||||
self._generate_detailed_description(parameters)
|
||||
)
|
||||
|
||||
full_description = description + "\n".join(param_descriptions)
|
||||
|
||||
tool = CrewAIPlatformActionTool(
|
||||
description=full_description,
|
||||
action_name=action_name,
|
||||
action_schema=action_schema,
|
||||
)
|
||||
|
||||
tools.append(tool)
|
||||
|
||||
self._tools = tools
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self.tools()
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
@@ -0,0 +1,28 @@
|
||||
import re
|
||||
import os
|
||||
import typing as t
|
||||
from typing import Literal
|
||||
import logging
|
||||
import json
|
||||
from crewai.tools import BaseTool
|
||||
from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import CrewaiPlatformToolBuilder
|
||||
from crewai_tools.adapters.tool_collection import ToolCollection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
def CrewaiPlatformTools(
|
||||
apps: list[str],
|
||||
) -> ToolCollection[BaseTool]:
|
||||
"""Factory function that returns crewai platform tools.
|
||||
Args:
|
||||
apps: List of platform apps to get tools that are available on the platform.
|
||||
|
||||
Returns:
|
||||
A list of BaseTool instances for platform actions
|
||||
"""
|
||||
|
||||
builder = CrewaiPlatformToolBuilder(apps=apps)
|
||||
|
||||
return builder.tools()
|
||||
13
crewai_tools/tools/crewai_platform_tools/misc.py
Normal file
13
crewai_tools/tools/crewai_platform_tools/misc.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import os
|
||||
|
||||
def get_platform_api_base_url() -> str:
|
||||
"""Get the platform API base URL from environment or use default."""
|
||||
base_url = os.getenv("CREWAI_PLUS_URL", "https://app.crewai.com")
|
||||
return f"{base_url}/crewai_plus/api/v1/integrations"
|
||||
|
||||
def get_platform_integration_token() -> str:
|
||||
"""Get the platform API base URL from environment or use default."""
|
||||
token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN") or ""
|
||||
if not token:
|
||||
raise ValueError("No platform integration token found, please set the CREWAI_PLATFORM_INTEGRATION_TOKEN environment variable")
|
||||
return token # TODO: Use context manager to get token
|
||||
59
crewai_tools/tools/csv_search_tool/README.md
Normal file
59
crewai_tools/tools/csv_search_tool/README.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# CSVSearchTool
|
||||
|
||||
## Description
|
||||
|
||||
This tool is used to perform a RAG (Retrieval-Augmented Generation) search within a CSV file's content. It allows users to semantically search for queries in the content of a specified CSV file. This feature is particularly useful for extracting information from large CSV datasets where traditional search methods might be inefficient. All tools with "Search" in their name, including CSVSearchTool, are RAG tools designed for searching different sources of data.
|
||||
|
||||
## Installation
|
||||
|
||||
Install the crewai_tools package
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from crewai_tools import CSVSearchTool
|
||||
|
||||
# Initialize the tool with a specific CSV file. This setup allows the agent to only search the given CSV file.
|
||||
tool = CSVSearchTool(csv='path/to/your/csvfile.csv')
|
||||
|
||||
# OR
|
||||
|
||||
# Initialize the tool without a specific CSV file. Agent will need to provide the CSV path at runtime.
|
||||
tool = CSVSearchTool()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `csv` : The path to the CSV file you want to search. This is a mandatory argument if the tool was initialized without a specific CSV file; otherwise, it is optional.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = CSVSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
56
crewai_tools/tools/csv_search_tool/csv_search_tool.py
Normal file
56
crewai_tools/tools/csv_search_tool/csv_search_tool.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from typing import Optional, Type
|
||||
|
||||
try:
|
||||
from embedchain.models.data_type import DataType
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedCSVSearchToolSchema(BaseModel):
|
||||
"""Input for CSVSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the CSV's content",
|
||||
)
|
||||
|
||||
|
||||
class CSVSearchToolSchema(FixedCSVSearchToolSchema):
|
||||
"""Input for CSVSearchTool."""
|
||||
|
||||
csv: str = Field(..., description="File path or URL of a CSV file to be searched")
|
||||
|
||||
|
||||
class CSVSearchTool(RagTool):
|
||||
name: str = "Search a CSV's content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a CSV's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = CSVSearchToolSchema
|
||||
|
||||
def __init__(self, csv: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if csv is not None:
|
||||
self.add(csv)
|
||||
self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content."
|
||||
self.args_schema = FixedCSVSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def add(self, csv: str) -> None:
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().add(csv, data_type=DataType.CSV)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
csv: Optional[str] = None,
|
||||
) -> str:
|
||||
if csv is not None:
|
||||
self.add(csv)
|
||||
return super()._run(query=search_query)
|
||||
41
crewai_tools/tools/dalle_tool/README.MD
Normal file
41
crewai_tools/tools/dalle_tool/README.MD
Normal file
@@ -0,0 +1,41 @@
|
||||
# DALL-E Tool
|
||||
|
||||
## Description
|
||||
This tool is used to give the Agent the ability to generate images using the DALL-E model. It is a transformer-based model that generates images from textual descriptions. This tool allows the Agent to generate images based on the text input provided by the user.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Remember that when using this tool, the text must be generated by the Agent itself. The text must be a description of the image you want to generate.
|
||||
|
||||
```python
|
||||
from crewai_tools import DallETool
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[DallETool()],
|
||||
)
|
||||
```
|
||||
|
||||
If needed you can also tweak the parameters of the DALL-E model by passing them as arguments to the `DallETool` class. For example:
|
||||
|
||||
```python
|
||||
from crewai_tools import DallETool
|
||||
|
||||
dalle_tool = DallETool(model="dall-e-3",
|
||||
size="1024x1024",
|
||||
quality="standard",
|
||||
n=1)
|
||||
|
||||
Agent(
|
||||
...
|
||||
tools=[dalle_tool]
|
||||
)
|
||||
```
|
||||
|
||||
The parameters are based on the `client.images.generate` method from the OpenAI API. For more information on the parameters, please refer to the [OpenAI API documentation](https://platform.openai.com/docs/guides/images/introduction?lang=python).
|
||||
52
crewai_tools/tools/dalle_tool/dalle_tool.py
Normal file
52
crewai_tools/tools/dalle_tool/dalle_tool.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import json
|
||||
from typing import List, Type
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
"""Input for Dall-E Tool."""
|
||||
|
||||
image_description: str = Field(description="Description of the image to be generated by Dall-E.")
|
||||
|
||||
|
||||
class DallETool(BaseTool):
|
||||
name: str = "Dall-E Tool"
|
||||
description: str = "Generates images using OpenAI's Dall-E model."
|
||||
args_schema: Type[BaseModel] = ImagePromptSchema
|
||||
|
||||
model: str = "dall-e-3"
|
||||
size: str = "1024x1024"
|
||||
quality: str = "standard"
|
||||
n: int = 1
|
||||
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="OPENAI_API_KEY", description="API key for OpenAI services", required=True),
|
||||
]
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
client = OpenAI()
|
||||
|
||||
image_description = kwargs.get("image_description")
|
||||
|
||||
if not image_description:
|
||||
return "Image description is required."
|
||||
|
||||
response = client.images.generate(
|
||||
model=self.model,
|
||||
prompt=image_description,
|
||||
size=self.size,
|
||||
quality=self.quality,
|
||||
n=self.n,
|
||||
)
|
||||
|
||||
image_data = json.dumps(
|
||||
{
|
||||
"image_url": response.data[0].url,
|
||||
"image_description": response.data[0].revised_prompt,
|
||||
}
|
||||
)
|
||||
|
||||
return image_data
|
||||
66
crewai_tools/tools/databricks_query_tool/README.md
Normal file
66
crewai_tools/tools/databricks_query_tool/README.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# Databricks Query Tool
|
||||
|
||||
## Description
|
||||
|
||||
This tool allows AI agents to execute SQL queries against Databricks workspace tables and retrieve the results. It provides a simple interface for querying data from Databricks tables using SQL, making it easy for agents to access and analyze data stored in Databricks.
|
||||
|
||||
## Installation
|
||||
|
||||
Install the crewai_tools package with the databricks extra:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]' 'databricks-sdk'
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
The tool requires Databricks authentication credentials. You can provide these in two ways:
|
||||
|
||||
1. **Using Databricks CLI profile**:
|
||||
- Set the `DATABRICKS_CONFIG_PROFILE` environment variable to your profile name.
|
||||
|
||||
2. **Using direct credentials**:
|
||||
- Set both `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables.
|
||||
|
||||
Example:
|
||||
```shell
|
||||
export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com"
|
||||
export DATABRICKS_TOKEN="dapi1234567890abcdef"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from crewai_tools import DatabricksQueryTool
|
||||
|
||||
# Basic usage
|
||||
databricks_tool = DatabricksQueryTool()
|
||||
|
||||
# With default parameters for catalog, schema, and warehouse
|
||||
databricks_tool = DatabricksQueryTool(
|
||||
default_catalog="my_catalog",
|
||||
default_schema="my_schema",
|
||||
default_warehouse_id="warehouse_id"
|
||||
)
|
||||
|
||||
# Example in a CrewAI agent
|
||||
@agent
|
||||
def data_analyst(self) -> Agent:
|
||||
return Agent(
|
||||
config=self.agents_config["data_analyst"],
|
||||
allow_delegation=False,
|
||||
tools=[databricks_tool]
|
||||
)
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
When executing queries, you can provide the following parameters:
|
||||
|
||||
- `query` (required): SQL query to execute against the Databricks workspace
|
||||
- `catalog` (optional): Databricks catalog name
|
||||
- `schema` (optional): Databricks schema name
|
||||
- `warehouse_id` (optional): Databricks SQL warehouse ID
|
||||
- `row_limit` (optional): Maximum number of rows to return (default: 1000)
|
||||
|
||||
If not provided, the tool will use the default values set during initialization.
|
||||
@@ -0,0 +1,670 @@
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from databricks.sdk import WorkspaceClient
|
||||
|
||||
class DatabricksQueryToolSchema(BaseModel):
|
||||
"""Input schema for DatabricksQueryTool."""
|
||||
|
||||
query: str = Field(
|
||||
..., description="SQL query to execute against the Databricks workspace table"
|
||||
)
|
||||
catalog: Optional[str] = Field(
|
||||
None, description="Databricks catalog name (optional, defaults to configured catalog)"
|
||||
)
|
||||
db_schema: Optional[str] = Field(
|
||||
None, description="Databricks schema name (optional, defaults to configured schema)"
|
||||
)
|
||||
warehouse_id: Optional[str] = Field(
|
||||
None, description="Databricks SQL warehouse ID (optional, defaults to configured warehouse)"
|
||||
)
|
||||
row_limit: Optional[int] = Field(
|
||||
1000, description="Maximum number of rows to return (default: 1000)"
|
||||
)
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_input(self) -> 'DatabricksQueryToolSchema':
|
||||
"""Validate the input parameters."""
|
||||
# Ensure the query is not empty
|
||||
if not self.query or not self.query.strip():
|
||||
raise ValueError("Query cannot be empty")
|
||||
|
||||
# Add a LIMIT clause to the query if row_limit is provided and query doesn't have one
|
||||
if self.row_limit and "limit" not in self.query.lower():
|
||||
self.query = f"{self.query.rstrip(';')} LIMIT {self.row_limit};"
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class DatabricksQueryTool(BaseTool):
|
||||
"""
|
||||
A tool for querying Databricks workspace tables using SQL.
|
||||
|
||||
This tool executes SQL queries against Databricks tables and returns the results.
|
||||
It requires Databricks authentication credentials to be set as environment variables.
|
||||
|
||||
Authentication can be provided via:
|
||||
- Databricks CLI profile: Set DATABRICKS_CONFIG_PROFILE environment variable
|
||||
- Direct credentials: Set DATABRICKS_HOST and DATABRICKS_TOKEN environment variables
|
||||
|
||||
Example:
|
||||
>>> tool = DatabricksQueryTool()
|
||||
>>> results = tool.run(query="SELECT * FROM my_table LIMIT 10")
|
||||
"""
|
||||
|
||||
name: str = "Databricks SQL Query"
|
||||
description: str = (
|
||||
"Execute SQL queries against Databricks workspace tables and return the results."
|
||||
" Provide a 'query' parameter with the SQL query to execute."
|
||||
)
|
||||
args_schema: Type[BaseModel] = DatabricksQueryToolSchema
|
||||
|
||||
# Optional default parameters
|
||||
default_catalog: Optional[str] = None
|
||||
default_schema: Optional[str] = None
|
||||
default_warehouse_id: Optional[str] = None
|
||||
|
||||
_workspace_client: Optional["WorkspaceClient"] = None
|
||||
package_dependencies: List[str] = ["databricks-sdk"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
default_catalog: Optional[str] = None,
|
||||
default_schema: Optional[str] = None,
|
||||
default_warehouse_id: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the DatabricksQueryTool.
|
||||
|
||||
Args:
|
||||
default_catalog (Optional[str]): Default catalog to use for queries.
|
||||
default_schema (Optional[str]): Default schema to use for queries.
|
||||
default_warehouse_id (Optional[str]): Default SQL warehouse ID to use.
|
||||
**kwargs: Additional keyword arguments passed to BaseTool.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.default_catalog = default_catalog
|
||||
self.default_schema = default_schema
|
||||
self.default_warehouse_id = default_warehouse_id
|
||||
self._validate_credentials()
|
||||
|
||||
def _validate_credentials(self) -> None:
|
||||
"""Validate that Databricks credentials are available."""
|
||||
has_profile = "DATABRICKS_CONFIG_PROFILE" in os.environ
|
||||
has_direct_auth = "DATABRICKS_HOST" in os.environ and "DATABRICKS_TOKEN" in os.environ
|
||||
|
||||
if not (has_profile or has_direct_auth):
|
||||
raise ValueError(
|
||||
"Databricks authentication credentials are required. "
|
||||
"Set either DATABRICKS_CONFIG_PROFILE or both DATABRICKS_HOST and DATABRICKS_TOKEN environment variables."
|
||||
)
|
||||
|
||||
@property
|
||||
def workspace_client(self) -> "WorkspaceClient":
|
||||
"""Get or create a Databricks WorkspaceClient instance."""
|
||||
if self._workspace_client is None:
|
||||
try:
|
||||
from databricks.sdk import WorkspaceClient
|
||||
self._workspace_client = WorkspaceClient()
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`databricks-sdk` package not found, please run `uv add databricks-sdk`"
|
||||
)
|
||||
return self._workspace_client
|
||||
|
||||
def _format_results(self, results: List[Dict[str, Any]]) -> str:
|
||||
"""Format query results as a readable string."""
|
||||
if not results:
|
||||
return "Query returned no results."
|
||||
|
||||
# Get column names from the first row
|
||||
if not results[0]:
|
||||
return "Query returned empty rows with no columns."
|
||||
|
||||
columns = list(results[0].keys())
|
||||
|
||||
# If we have rows but they're all empty, handle that case
|
||||
if not columns:
|
||||
return "Query returned rows but with no column data."
|
||||
|
||||
# Calculate column widths based on data
|
||||
col_widths = {col: len(col) for col in columns}
|
||||
for row in results:
|
||||
for col in columns:
|
||||
# Convert value to string and get its length
|
||||
# Handle None values gracefully
|
||||
value_str = str(row[col]) if row[col] is not None else "NULL"
|
||||
col_widths[col] = max(col_widths[col], len(value_str))
|
||||
|
||||
# Create header row
|
||||
header = " | ".join(f"{col:{col_widths[col]}}" for col in columns)
|
||||
separator = "-+-".join("-" * col_widths[col] for col in columns)
|
||||
|
||||
# Format data rows
|
||||
data_rows = []
|
||||
for row in results:
|
||||
# Handle None values by displaying "NULL"
|
||||
row_values = {col: str(row[col]) if row[col] is not None else "NULL" for col in columns}
|
||||
data_row = " | ".join(f"{row_values[col]:{col_widths[col]}}" for col in columns)
|
||||
data_rows.append(data_row)
|
||||
|
||||
# Add row count information
|
||||
result_info = f"({len(results)} row{'s' if len(results) != 1 else ''} returned)"
|
||||
|
||||
# Combine all parts
|
||||
return f"{header}\n{separator}\n" + "\n".join(data_rows) + f"\n\n{result_info}"
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""
|
||||
Execute a SQL query against Databricks and return the results.
|
||||
|
||||
Args:
|
||||
query (str): SQL query to execute
|
||||
catalog (Optional[str]): Databricks catalog name
|
||||
db_schema (Optional[str]): Databricks schema name
|
||||
warehouse_id (Optional[str]): SQL warehouse ID
|
||||
row_limit (Optional[int]): Maximum number of rows to return
|
||||
|
||||
Returns:
|
||||
str: Formatted query results
|
||||
"""
|
||||
try:
|
||||
# Get parameters with fallbacks to default values
|
||||
query = kwargs.get("query")
|
||||
catalog = kwargs.get("catalog") or self.default_catalog
|
||||
db_schema = kwargs.get("db_schema") or self.default_schema
|
||||
warehouse_id = kwargs.get("warehouse_id") or self.default_warehouse_id
|
||||
row_limit = kwargs.get("row_limit", 1000)
|
||||
|
||||
# Validate schema and query
|
||||
validated_input = DatabricksQueryToolSchema(
|
||||
query=query,
|
||||
catalog=catalog,
|
||||
db_schema=db_schema,
|
||||
warehouse_id=warehouse_id,
|
||||
row_limit=row_limit
|
||||
)
|
||||
|
||||
# Extract validated parameters
|
||||
query = validated_input.query
|
||||
catalog = validated_input.catalog
|
||||
db_schema = validated_input.db_schema
|
||||
warehouse_id = validated_input.warehouse_id
|
||||
|
||||
# Setup SQL context with catalog/schema if provided
|
||||
context = {}
|
||||
if catalog:
|
||||
context["catalog"] = catalog
|
||||
if db_schema:
|
||||
context["schema"] = db_schema
|
||||
|
||||
# Execute query
|
||||
statement = self.workspace_client.statement_execution
|
||||
|
||||
try:
|
||||
# Execute the statement
|
||||
execution = statement.execute_statement(
|
||||
warehouse_id=warehouse_id,
|
||||
statement=query,
|
||||
**context
|
||||
)
|
||||
|
||||
statement_id = execution.statement_id
|
||||
except Exception as execute_error:
|
||||
# Handle immediate execution errors
|
||||
return f"Error starting query execution: {str(execute_error)}"
|
||||
|
||||
# Poll for results with better error handling
|
||||
import time
|
||||
result = None
|
||||
timeout = 300 # 5 minutes timeout
|
||||
start_time = time.time()
|
||||
poll_count = 0
|
||||
previous_state = None # Track previous state to detect changes
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
poll_count += 1
|
||||
try:
|
||||
# Get statement status
|
||||
result = statement.get_statement(statement_id)
|
||||
|
||||
# Check if finished - be very explicit about state checking
|
||||
if hasattr(result, 'status') and hasattr(result.status, 'state'):
|
||||
state_value = str(result.status.state) # Convert to string to handle both string and enum
|
||||
|
||||
# Track state changes for debugging
|
||||
if previous_state != state_value:
|
||||
previous_state = state_value
|
||||
|
||||
# Check if state indicates completion
|
||||
if "SUCCEEDED" in state_value:
|
||||
break
|
||||
elif "FAILED" in state_value:
|
||||
# Extract error message with more robust handling
|
||||
error_info = "No detailed error info"
|
||||
try:
|
||||
# First try direct access to error.message
|
||||
if hasattr(result.status, 'error') and result.status.error:
|
||||
if hasattr(result.status.error, 'message'):
|
||||
error_info = result.status.error.message
|
||||
# Some APIs may have a different structure
|
||||
elif hasattr(result.status.error, 'error_message'):
|
||||
error_info = result.status.error.error_message
|
||||
# Last resort, try to convert the whole error object to string
|
||||
else:
|
||||
error_info = str(result.status.error)
|
||||
except Exception as err_extract_error:
|
||||
# If all else fails, try to get any info we can
|
||||
error_info = f"Error details unavailable: {str(err_extract_error)}"
|
||||
|
||||
# Return immediately on first FAILED state detection
|
||||
return f"Query execution failed: {error_info}"
|
||||
elif "CANCELED" in state_value:
|
||||
return "Query was canceled"
|
||||
|
||||
except Exception as poll_error:
|
||||
# Don't immediately fail - try again a few times
|
||||
if poll_count > 3:
|
||||
return f"Error checking query status: {str(poll_error)}"
|
||||
|
||||
# Wait before polling again
|
||||
time.sleep(2)
|
||||
|
||||
# Check if we timed out
|
||||
if result is None:
|
||||
return "Query returned no result (likely timed out or failed)"
|
||||
|
||||
if not hasattr(result, 'status') or not hasattr(result.status, 'state'):
|
||||
return "Query completed but returned an invalid result structure"
|
||||
|
||||
# Convert state to string for comparison
|
||||
state_value = str(result.status.state)
|
||||
if not any(state in state_value for state in ["SUCCEEDED", "FAILED", "CANCELED"]):
|
||||
return f"Query timed out after 5 minutes (last state: {state_value})"
|
||||
|
||||
# Get results - adapt this based on the actual structure of the result object
|
||||
chunk_results = []
|
||||
|
||||
# Check if we have results and a schema in a very defensive way
|
||||
has_schema = (hasattr(result, 'manifest') and result.manifest is not None and
|
||||
hasattr(result.manifest, 'schema') and result.manifest.schema is not None)
|
||||
has_result = (hasattr(result, 'result') and result.result is not None)
|
||||
|
||||
if has_schema and has_result:
|
||||
try:
|
||||
# Get schema for column names
|
||||
columns = [col.name for col in result.manifest.schema.columns]
|
||||
|
||||
# Debug info for schema
|
||||
|
||||
# Keep track of all dynamic columns we create
|
||||
all_columns = set(columns)
|
||||
|
||||
# Dump the raw structure of result data to help troubleshoot
|
||||
if hasattr(result.result, 'data_array'):
|
||||
# Add defensive check for None data_array
|
||||
if result.result.data_array is None:
|
||||
print("data_array is None - likely an empty result set or DDL query")
|
||||
# Return empty result handling rather than trying to process null data
|
||||
return "Query executed successfully (no data returned)"
|
||||
|
||||
# IMPROVED DETECTION LOGIC: Check if we're possibly dealing with rows where each item
|
||||
# contains a single value or character (which could indicate incorrect row structure)
|
||||
is_likely_incorrect_row_structure = False
|
||||
|
||||
# Only try to analyze sample if data_array exists and has content
|
||||
if hasattr(result.result, 'data_array') and result.result.data_array and len(result.result.data_array) > 0 and len(result.result.data_array[0]) > 0:
|
||||
sample_size = min(20, len(result.result.data_array[0]))
|
||||
|
||||
if sample_size > 0:
|
||||
single_char_count = 0
|
||||
single_digit_count = 0
|
||||
total_items = 0
|
||||
|
||||
for i in range(sample_size):
|
||||
val = result.result.data_array[0][i]
|
||||
total_items += 1
|
||||
if isinstance(val, str) and len(val) == 1 and not val.isdigit():
|
||||
single_char_count += 1
|
||||
elif isinstance(val, str) and len(val) == 1 and val.isdigit():
|
||||
single_digit_count += 1
|
||||
|
||||
# If a significant portion of the first values are single characters or digits,
|
||||
# this likely indicates data is being incorrectly structured
|
||||
if total_items > 0 and (single_char_count + single_digit_count) / total_items > 0.5:
|
||||
is_likely_incorrect_row_structure = True
|
||||
|
||||
# Additional check: if many rows have just 1 item when we expect multiple columns
|
||||
rows_with_single_item = 0
|
||||
if hasattr(result.result, 'data_array') and result.result.data_array and len(result.result.data_array) > 0:
|
||||
sample_size_for_rows = min(sample_size, len(result.result.data_array[0])) if 'sample_size' in locals() else min(20, len(result.result.data_array[0]))
|
||||
rows_with_single_item = sum(1 for row in result.result.data_array[0][:sample_size_for_rows] if isinstance(row, list) and len(row) == 1)
|
||||
if rows_with_single_item > sample_size_for_rows * 0.5 and len(columns) > 1:
|
||||
is_likely_incorrect_row_structure = True
|
||||
|
||||
# Check if we're getting primarily single characters or the data structure seems off,
|
||||
# we should use special handling
|
||||
if 'is_likely_incorrect_row_structure' in locals() and is_likely_incorrect_row_structure:
|
||||
print("Data appears to be malformed - will use special row reconstruction")
|
||||
needs_special_string_handling = True
|
||||
else:
|
||||
needs_special_string_handling = False
|
||||
|
||||
# Process results differently based on detection
|
||||
if 'needs_special_string_handling' in locals() and needs_special_string_handling:
|
||||
# We're dealing with data where the rows may be incorrectly structured
|
||||
print("Using row reconstruction processing mode")
|
||||
|
||||
# Collect all values into a flat list
|
||||
all_values = []
|
||||
if hasattr(result.result, 'data_array') and result.result.data_array:
|
||||
# Flatten all values into a single list
|
||||
for chunk in result.result.data_array:
|
||||
for item in chunk:
|
||||
if isinstance(item, (list, tuple)):
|
||||
all_values.extend(item)
|
||||
else:
|
||||
all_values.append(item)
|
||||
|
||||
# Get the expected column count from schema
|
||||
expected_column_count = len(columns)
|
||||
|
||||
# Try to reconstruct rows using pattern recognition
|
||||
reconstructed_rows = []
|
||||
|
||||
# PATTERN RECOGNITION APPROACH
|
||||
# Look for likely indicators of row boundaries in the data
|
||||
# For Netflix data, we expect IDs as numbers, titles as text strings, etc.
|
||||
|
||||
# Use regex pattern to identify ID columns that likely start a new row
|
||||
import re
|
||||
id_pattern = re.compile(r'^\d{5,9}$') # Netflix IDs are often 5-9 digits
|
||||
id_indices = []
|
||||
|
||||
for i, val in enumerate(all_values):
|
||||
if isinstance(val, str) and id_pattern.match(val):
|
||||
# This value looks like an ID, might be the start of a row
|
||||
if i < len(all_values) - 1:
|
||||
next_few_values = all_values[i+1:i+5]
|
||||
# If following values look like they could be part of a title
|
||||
if any(isinstance(v, str) and len(v) > 1 for v in next_few_values):
|
||||
id_indices.append(i)
|
||||
|
||||
if id_indices:
|
||||
|
||||
# If we found potential row starts, use them to extract rows
|
||||
for i in range(len(id_indices)):
|
||||
start_idx = id_indices[i]
|
||||
end_idx = id_indices[i+1] if i+1 < len(id_indices) else len(all_values)
|
||||
|
||||
# Extract values for this row
|
||||
row_values = all_values[start_idx:end_idx]
|
||||
|
||||
# Special handling for Netflix title data
|
||||
# Titles might be split into individual characters
|
||||
if 'Title' in columns and len(row_values) > expected_column_count:
|
||||
|
||||
# Try to reconstruct by looking for patterns
|
||||
# We know ID is first, then Title (which may be split)
|
||||
# Then other fields like Genre, etc.
|
||||
|
||||
# Take first value as ID
|
||||
row_dict = {columns[0]: row_values[0]}
|
||||
|
||||
# Look for Genre or other non-title fields to determine where title ends
|
||||
title_end_idx = 1
|
||||
for j in range(2, min(100, len(row_values))):
|
||||
val = row_values[j]
|
||||
# Check for common genres or non-title markers
|
||||
if isinstance(val, str) and val in ['Comedy', 'Drama', 'Action', 'Horror', 'Thriller', 'Documentary']:
|
||||
# Likely found the Genre field
|
||||
title_end_idx = j
|
||||
break
|
||||
|
||||
# Reconstruct title from individual characters
|
||||
if title_end_idx > 1:
|
||||
title_chars = row_values[1:title_end_idx]
|
||||
# Check if they're individual characters
|
||||
if all(isinstance(c, str) and len(c) == 1 for c in title_chars):
|
||||
title = ''.join(title_chars)
|
||||
row_dict['Title'] = title
|
||||
|
||||
# Assign remaining values to columns
|
||||
remaining_values = row_values[title_end_idx:]
|
||||
for j, col_name in enumerate(columns[2:], 2):
|
||||
if j-2 < len(remaining_values):
|
||||
row_dict[col_name] = remaining_values[j-2]
|
||||
else:
|
||||
row_dict[col_name] = None
|
||||
else:
|
||||
# Fallback: simple mapping
|
||||
for j, col_name in enumerate(columns):
|
||||
if j < len(row_values):
|
||||
row_dict[col_name] = row_values[j]
|
||||
else:
|
||||
row_dict[col_name] = None
|
||||
else:
|
||||
# Standard mapping
|
||||
row_dict = {}
|
||||
for j, col_name in enumerate(columns):
|
||||
if j < len(row_values):
|
||||
row_dict[col_name] = row_values[j]
|
||||
else:
|
||||
row_dict[col_name] = None
|
||||
|
||||
reconstructed_rows.append(row_dict)
|
||||
else:
|
||||
# More intelligent chunking - try to detect where columns like Title might be split
|
||||
title_idx = columns.index('Title') if 'Title' in columns else -1
|
||||
|
||||
if title_idx >= 0:
|
||||
print("Attempting title reconstruction method")
|
||||
# Try to detect if title is split across multiple values
|
||||
i = 0
|
||||
while i < len(all_values):
|
||||
# Check if this could be an ID (start of a row)
|
||||
if isinstance(all_values[i], str) and id_pattern.match(all_values[i]):
|
||||
row_dict = {columns[0]: all_values[i]}
|
||||
i += 1
|
||||
|
||||
# Try to reconstruct title if it appears to be split
|
||||
title_chars = []
|
||||
while (i < len(all_values) and
|
||||
isinstance(all_values[i], str) and
|
||||
len(all_values[i]) <= 1 and
|
||||
len(title_chars) < 100): # Cap title length
|
||||
title_chars.append(all_values[i])
|
||||
i += 1
|
||||
|
||||
if title_chars:
|
||||
row_dict[columns[title_idx]] = ''.join(title_chars)
|
||||
|
||||
# Add remaining fields
|
||||
for j in range(title_idx + 1, len(columns)):
|
||||
if i < len(all_values):
|
||||
row_dict[columns[j]] = all_values[i]
|
||||
i += 1
|
||||
else:
|
||||
row_dict[columns[j]] = None
|
||||
|
||||
reconstructed_rows.append(row_dict)
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# If we still don't have rows, use simple chunking as fallback
|
||||
if not reconstructed_rows:
|
||||
print("Falling back to basic chunking approach")
|
||||
chunks = [all_values[i:i+expected_column_count] for i in range(0, len(all_values), expected_column_count)]
|
||||
|
||||
for chunk in chunks:
|
||||
# Skip chunks that seem to be partial/incomplete rows
|
||||
if len(chunk) < expected_column_count * 0.75: # Allow for some missing values
|
||||
continue
|
||||
|
||||
row_dict = {}
|
||||
|
||||
# Map values to column names
|
||||
for i, col in enumerate(columns):
|
||||
if i < len(chunk):
|
||||
row_dict[col] = chunk[i]
|
||||
else:
|
||||
row_dict[col] = None
|
||||
|
||||
reconstructed_rows.append(row_dict)
|
||||
|
||||
# Apply post-processing to fix known issues
|
||||
if reconstructed_rows and 'Title' in columns:
|
||||
print("Applying post-processing to improve data quality")
|
||||
for row in reconstructed_rows:
|
||||
# Fix titles that might still have issues
|
||||
if isinstance(row.get('Title'), str) and len(row.get('Title')) <= 1:
|
||||
# This is likely still a fragmented title - mark as potentially incomplete
|
||||
row['Title'] = f"[INCOMPLETE] {row.get('Title')}"
|
||||
|
||||
# Ensure we respect the row limit
|
||||
if row_limit and len(reconstructed_rows) > row_limit:
|
||||
reconstructed_rows = reconstructed_rows[:row_limit]
|
||||
|
||||
chunk_results = reconstructed_rows
|
||||
else:
|
||||
# Process normal result structure as before
|
||||
print("Using standard processing mode")
|
||||
|
||||
# Check different result structures
|
||||
if hasattr(result.result, 'data_array') and result.result.data_array:
|
||||
# Check if data appears to be malformed within chunks
|
||||
for chunk_idx, chunk in enumerate(result.result.data_array):
|
||||
|
||||
# Check if chunk might actually contain individual columns of a single row
|
||||
# This is another way data might be malformed - check the first few values
|
||||
if len(chunk) > 0 and len(columns) > 1:
|
||||
# If there seems to be a mismatch between chunk structure and expected columns
|
||||
first_few_values = chunk[:min(5, len(chunk))]
|
||||
if all(isinstance(val, (str, int, float)) and not isinstance(val, (list, dict)) for val in first_few_values):
|
||||
if len(chunk) > len(columns) * 3: # Heuristic: if chunk has way more items than columns
|
||||
print("Chunk appears to contain individual values rather than rows - switching to row reconstruction")
|
||||
|
||||
# This chunk might actually be values of multiple rows - try to reconstruct
|
||||
values = chunk # All values in this chunk
|
||||
reconstructed_rows = []
|
||||
|
||||
# Try to create rows based on expected column count
|
||||
for i in range(0, len(values), len(columns)):
|
||||
if i + len(columns) <= len(values): # Ensure we have enough values
|
||||
row_values = values[i:i+len(columns)]
|
||||
row_dict = {col: val for col, val in zip(columns, row_values)}
|
||||
reconstructed_rows.append(row_dict)
|
||||
|
||||
if reconstructed_rows:
|
||||
chunk_results.extend(reconstructed_rows)
|
||||
continue # Skip normal processing for this chunk
|
||||
|
||||
# Special case: when chunk contains exactly the right number of values for a single row
|
||||
# This handles the case where instead of a list of rows, we just got all values in a flat list
|
||||
if all(isinstance(val, (str, int, float)) and not isinstance(val, (list, dict)) for val in chunk):
|
||||
if len(chunk) == len(columns) or (len(chunk) > 0 and len(chunk) % len(columns) == 0):
|
||||
|
||||
# Process flat list of values as rows
|
||||
for i in range(0, len(chunk), len(columns)):
|
||||
row_values = chunk[i:i+len(columns)]
|
||||
if len(row_values) == len(columns): # Only process complete rows
|
||||
row_dict = {col: val for col, val in zip(columns, row_values)}
|
||||
chunk_results.append(row_dict)
|
||||
|
||||
# Skip regular row processing for this chunk
|
||||
continue
|
||||
|
||||
# Normal processing for typical row structure
|
||||
for row_idx, row in enumerate(chunk):
|
||||
# Ensure row is actually a collection of values
|
||||
if not isinstance(row, (list, tuple, dict)):
|
||||
# This might be a single value; skip it or handle specially
|
||||
continue
|
||||
|
||||
# Convert each row to a dictionary with column names as keys
|
||||
row_dict = {}
|
||||
|
||||
# Handle dict rows directly
|
||||
if isinstance(row, dict):
|
||||
# Use the existing column mapping
|
||||
row_dict = dict(row)
|
||||
elif isinstance(row, (list, tuple)):
|
||||
# Map list of values to columns
|
||||
for i, val in enumerate(row):
|
||||
if i < len(columns): # Only process if we have a matching column
|
||||
row_dict[columns[i]] = val
|
||||
else:
|
||||
# Extra values without column names
|
||||
dynamic_col = f"Column_{i}"
|
||||
row_dict[dynamic_col] = val
|
||||
all_columns.add(dynamic_col)
|
||||
|
||||
# If we have fewer values than columns, set missing values to None
|
||||
for col in columns:
|
||||
if col not in row_dict:
|
||||
row_dict[col] = None
|
||||
|
||||
chunk_results.append(row_dict)
|
||||
|
||||
elif hasattr(result.result, 'data') and result.result.data:
|
||||
# Alternative data structure
|
||||
|
||||
for row_idx, row in enumerate(result.result.data):
|
||||
# Debug info
|
||||
|
||||
# Safely create dictionary matching column names to values
|
||||
row_dict = {}
|
||||
for i, val in enumerate(row):
|
||||
if i < len(columns): # Only process if we have a matching column
|
||||
row_dict[columns[i]] = val
|
||||
else:
|
||||
# Extra values without column names
|
||||
dynamic_col = f"Column_{i}"
|
||||
row_dict[dynamic_col] = val
|
||||
all_columns.add(dynamic_col)
|
||||
|
||||
# If we have fewer values than columns, set missing values to None
|
||||
for i, col in enumerate(columns):
|
||||
if i >= len(row):
|
||||
row_dict[col] = None
|
||||
|
||||
chunk_results.append(row_dict)
|
||||
|
||||
# After processing all rows, ensure all rows have all columns
|
||||
normalized_results = []
|
||||
for row in chunk_results:
|
||||
# Create a new row with all columns, defaulting to None for missing ones
|
||||
normalized_row = {col: row.get(col, None) for col in all_columns}
|
||||
normalized_results.append(normalized_row)
|
||||
|
||||
# Replace the original results with normalized ones
|
||||
chunk_results = normalized_results
|
||||
|
||||
except Exception as results_error:
|
||||
# Enhanced error message with more context
|
||||
import traceback
|
||||
error_details = traceback.format_exc()
|
||||
return f"Error processing query results: {str(results_error)}\n\nDetails:\n{error_details}"
|
||||
|
||||
# If we have no results but the query succeeded (e.g., for DDL statements)
|
||||
if not chunk_results and hasattr(result, 'status'):
|
||||
state_value = str(result.status.state)
|
||||
if "SUCCEEDED" in state_value:
|
||||
return "Query executed successfully (no results to display)"
|
||||
|
||||
# Format and return results
|
||||
return self._format_results(chunk_results)
|
||||
|
||||
except Exception as e:
|
||||
# Include more details in the error message to help with debugging
|
||||
import traceback
|
||||
error_details = traceback.format_exc()
|
||||
return f"Error executing Databricks query: {str(e)}\n\nDetails:\n{error_details}"
|
||||
40
crewai_tools/tools/directory_read_tool/README.md
Normal file
40
crewai_tools/tools/directory_read_tool/README.md
Normal file
@@ -0,0 +1,40 @@
|
||||
```markdown
|
||||
# DirectoryReadTool
|
||||
|
||||
## Description
|
||||
The DirectoryReadTool is a highly efficient utility designed for the comprehensive listing of directory contents. It recursively navigates through the specified directory, providing users with a detailed enumeration of all files, including those nested within subdirectories. This tool is indispensable for tasks requiring a thorough inventory of directory structures or for validating the organization of files within directories.
|
||||
|
||||
## Installation
|
||||
Install the `crewai_tools` package to use the DirectoryReadTool in your project. If you haven't added this package to your environment, you can easily install it with pip using the following command:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
This installs the latest version of the `crewai_tools` package, allowing access to the DirectoryReadTool and other utilities.
|
||||
|
||||
## Example
|
||||
The DirectoryReadTool is simple to use. The code snippet below shows how to set up and use the tool to list the contents of a specified directory:
|
||||
|
||||
```python
|
||||
from crewai_tools import DirectoryReadTool
|
||||
|
||||
# Initialize the tool with the directory you want to explore
|
||||
tool = DirectoryReadTool(directory='/path/to/your/directory')
|
||||
|
||||
# Use the tool to list the contents of the specified directory
|
||||
directory_contents = tool.run()
|
||||
print(directory_contents)
|
||||
```
|
||||
|
||||
This example demonstrates the essential steps to utilize the DirectoryReadTool effectively, highlighting its simplicity and user-friendly design.
|
||||
|
||||
## Arguments
|
||||
The DirectoryReadTool requires minimal configuration for use. The essential argument for this tool is as follows:
|
||||
|
||||
- `directory`: A mandatory argument that specifies the path to the directory whose contents you wish to list. It accepts both absolute and relative paths, guiding the tool to the desired directory for content listing.
|
||||
|
||||
The DirectoryReadTool provides a user-friendly and efficient way to list directory contents, making it an invaluable tool for managing and inspecting directory structures.
|
||||
```
|
||||
|
||||
This revised documentation for the DirectoryReadTool maintains the structure and content requirements as outlined, with adjustments made for clarity, consistency, and adherence to the high-quality standards exemplified in the provided documentation example.
|
||||
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class FixedDirectoryReadToolSchema(BaseModel):
|
||||
"""Input for DirectoryReadTool."""
|
||||
|
||||
|
||||
class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
|
||||
"""Input for DirectoryReadTool."""
|
||||
|
||||
directory: str = Field(..., description="Mandatory directory to list content")
|
||||
|
||||
|
||||
class DirectoryReadTool(BaseTool):
|
||||
name: str = "List files in directory"
|
||||
description: str = (
|
||||
"A tool that can be used to recursively list a directory's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = DirectoryReadToolSchema
|
||||
directory: Optional[str] = None
|
||||
|
||||
def __init__(self, directory: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if directory is not None:
|
||||
self.directory = directory
|
||||
self.description = f"A tool that can be used to list {directory}'s content."
|
||||
self.args_schema = FixedDirectoryReadToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
directory = kwargs.get("directory", self.directory)
|
||||
if directory[-1] == "/":
|
||||
directory = directory[:-1]
|
||||
files_list = [
|
||||
f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}"
|
||||
for root, dirs, files in os.walk(directory)
|
||||
for filename in files
|
||||
]
|
||||
files = "\n- ".join(files_list)
|
||||
return f"File paths: \n-{files}"
|
||||
55
crewai_tools/tools/directory_search_tool/README.md
Normal file
55
crewai_tools/tools/directory_search_tool/README.md
Normal file
@@ -0,0 +1,55 @@
|
||||
# DirectorySearchTool
|
||||
|
||||
## Description
|
||||
This tool is designed to perform a semantic search for queries within the content of a specified directory. Utilizing the RAG (Retrieval-Augmented Generation) methodology, it offers a powerful means to semantically navigate through the files of a given directory. The tool can be dynamically set to search any directory specified at runtime or can be pre-configured to search within a specific directory upon initialization.
|
||||
|
||||
## Installation
|
||||
To start using the DirectorySearchTool, you need to install the crewai_tools package. Execute the following command in your terminal:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following examples demonstrate how to initialize the DirectorySearchTool for different use cases and how to perform a search:
|
||||
|
||||
```python
|
||||
from crewai_tools import DirectorySearchTool
|
||||
|
||||
# To enable searching within any specified directory at runtime
|
||||
tool = DirectorySearchTool()
|
||||
|
||||
# Alternatively, to restrict searches to a specific directory
|
||||
tool = DirectorySearchTool(directory='/path/to/directory')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `directory` : This string argument specifies the directory within which to search. It is mandatory if the tool has not been initialized with a directory; otherwise, the tool will only search within the initialized directory.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = DirectorySearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,59 @@
|
||||
from typing import Optional, Type
|
||||
|
||||
try:
|
||||
from embedchain.loaders.directory_loader import DirectoryLoader
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDirectorySearchToolSchema(BaseModel):
|
||||
"""Input for DirectorySearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the directory's content",
|
||||
)
|
||||
|
||||
|
||||
class DirectorySearchToolSchema(FixedDirectorySearchToolSchema):
|
||||
"""Input for DirectorySearchTool."""
|
||||
|
||||
directory: str = Field(..., description="Mandatory directory you want to search")
|
||||
|
||||
|
||||
class DirectorySearchTool(RagTool):
|
||||
name: str = "Search a directory's content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a directory's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = DirectorySearchToolSchema
|
||||
|
||||
def __init__(self, directory: Optional[str] = None, **kwargs):
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().__init__(**kwargs)
|
||||
if directory is not None:
|
||||
self.add(directory)
|
||||
self.description = f"A tool that can be used to semantic search a query the {directory} directory's content."
|
||||
self.args_schema = FixedDirectorySearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def add(self, directory: str) -> None:
|
||||
super().add(
|
||||
directory,
|
||||
loader=DirectoryLoader(config=dict(recursive=True)),
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
directory: Optional[str] = None,
|
||||
) -> str:
|
||||
if directory is not None:
|
||||
self.add(directory)
|
||||
return super()._run(query=search_query)
|
||||
57
crewai_tools/tools/docx_search_tool/README.md
Normal file
57
crewai_tools/tools/docx_search_tool/README.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# DOCXSearchTool
|
||||
|
||||
## Description
|
||||
The DOCXSearchTool is a RAG tool designed for semantic searching within DOCX documents. It enables users to effectively search and extract relevant information from DOCX files using query-based searches. This tool is invaluable for data analysis, information management, and research tasks, streamlining the process of finding specific information within large document collections.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package by running the following command in your terminal:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates initializing the DOCXSearchTool to search within any DOCX file's content or with a specific DOCX file path.
|
||||
|
||||
```python
|
||||
from crewai_tools import DOCXSearchTool
|
||||
|
||||
# Initialize the tool to search within any DOCX file's content
|
||||
tool = DOCXSearchTool()
|
||||
|
||||
# OR
|
||||
|
||||
# Initialize the tool with a specific DOCX file, so the agent can only search the content of the specified DOCX file
|
||||
tool = DOCXSearchTool(docx='path/to/your/document.docx')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `docx`: An optional file path to a specific DOCX document you wish to search. If not provided during initialization, the tool allows for later specification of any DOCX file's content path for searching.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = DOCXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
62
crewai_tools/tools/docx_search_tool/docx_search_tool.py
Normal file
62
crewai_tools/tools/docx_search_tool/docx_search_tool.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
try:
|
||||
from embedchain.models.data_type import DataType
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDOCXSearchToolSchema(BaseModel):
|
||||
"""Input for DOCXSearchTool."""
|
||||
|
||||
docx: Optional[str] = Field(
|
||||
..., description="File path or URL of a DOCX file to be searched"
|
||||
)
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the DOCX's content",
|
||||
)
|
||||
|
||||
|
||||
class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
|
||||
"""Input for DOCXSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the DOCX's content",
|
||||
)
|
||||
|
||||
|
||||
class DOCXSearchTool(RagTool):
|
||||
name: str = "Search a DOCX's content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a DOCX's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = DOCXSearchToolSchema
|
||||
|
||||
def __init__(self, docx: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if docx is not None:
|
||||
self.add(docx)
|
||||
self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content."
|
||||
self.args_schema = FixedDOCXSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def add(self, docx: str) -> None:
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().add(docx, data_type=DataType.DOCX)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
docx: Optional[str] = None,
|
||||
) -> Any:
|
||||
if docx is not None:
|
||||
self.add(docx)
|
||||
return super()._run(query=search_query)
|
||||
30
crewai_tools/tools/exa_tools/README.md
Normal file
30
crewai_tools/tools/exa_tools/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# EXASearchTool Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `https://exa.ai/` API to fetch and display the most relevant search results based on the query provided by the user.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
uv add crewai[tools] exa_py
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import EXASearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = EXASearchTool(api_key="your_api_key")
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `EXASearchTool`, follow these steps:
|
||||
|
||||
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **API Key Acquisition**: Acquire a `https://exa.ai/` API key by registering for a free account at `https://exa.ai/`.
|
||||
3. **Environment Configuration**: Store your obtained API key in an environment variable named `EXA_API_KEY` to facilitate its use by the tool.
|
||||
|
||||
## Conclusion
|
||||
By integrating the `EXASearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
|
||||
108
crewai_tools/tools/exa_tools/exa_search_tool.py
Normal file
108
crewai_tools/tools/exa_tools/exa_search_tool.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import os
|
||||
from typing import Any, List, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
try:
|
||||
from exa_py import Exa
|
||||
|
||||
EXA_INSTALLED = True
|
||||
except ImportError:
|
||||
Exa = Any
|
||||
EXA_INSTALLED = False
|
||||
|
||||
|
||||
class EXABaseToolSchema(BaseModel):
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to search the internet"
|
||||
)
|
||||
start_published_date: Optional[str] = Field(
|
||||
None, description="Start date for the search"
|
||||
)
|
||||
end_published_date: Optional[str] = Field(
|
||||
None, description="End date for the search"
|
||||
)
|
||||
include_domains: Optional[list[str]] = Field(
|
||||
None, description="List of domains to include in the search"
|
||||
)
|
||||
|
||||
|
||||
class EXASearchTool(BaseTool):
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
name: str = "EXASearchTool"
|
||||
description: str = "Search the internet using Exa"
|
||||
args_schema: Type[BaseModel] = EXABaseToolSchema
|
||||
client: Optional["Exa"] = None
|
||||
content: Optional[bool] = False
|
||||
summary: Optional[bool] = False
|
||||
type: Optional[str] = "auto"
|
||||
package_dependencies: List[str] = ["exa_py"]
|
||||
api_key: Optional[str] = Field(
|
||||
default_factory=lambda: os.getenv("EXA_API_KEY"),
|
||||
description="API key for Exa services",
|
||||
json_schema_extra={"required": False},
|
||||
)
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(
|
||||
name="EXA_API_KEY", description="API key for Exa services", required=False
|
||||
),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: Optional[bool] = False,
|
||||
summary: Optional[bool] = False,
|
||||
type: Optional[str] = "auto",
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(
|
||||
**kwargs,
|
||||
)
|
||||
if not EXA_INSTALLED:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'exa_py' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "exa_py"], check=True)
|
||||
|
||||
else:
|
||||
raise ImportError(
|
||||
"You are missing the 'exa_py' package. Would you like to install it?"
|
||||
)
|
||||
self.client = Exa(api_key=self.api_key)
|
||||
self.content = content
|
||||
self.summary = summary
|
||||
self.type = type
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
start_published_date: Optional[str] = None,
|
||||
end_published_date: Optional[str] = None,
|
||||
include_domains: Optional[list[str]] = None,
|
||||
) -> Any:
|
||||
if self.client is None:
|
||||
raise ValueError("Client not initialized")
|
||||
|
||||
search_params = {
|
||||
"type": self.type,
|
||||
}
|
||||
|
||||
if start_published_date:
|
||||
search_params["start_published_date"] = start_published_date
|
||||
if end_published_date:
|
||||
search_params["end_published_date"] = end_published_date
|
||||
if include_domains:
|
||||
search_params["include_domains"] = include_domains
|
||||
|
||||
if self.content:
|
||||
results = self.client.search_and_contents(
|
||||
search_query, summary=self.summary, **search_params
|
||||
)
|
||||
else:
|
||||
results = self.client.search(search_query, **search_params)
|
||||
return results
|
||||
40
crewai_tools/tools/file_read_tool/README.md
Normal file
40
crewai_tools/tools/file_read_tool/README.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# FileReadTool
|
||||
|
||||
## Description
|
||||
|
||||
The FileReadTool is a versatile component of the crewai_tools package, designed to streamline the process of reading and retrieving content from files. It is particularly useful in scenarios such as batch text file processing, runtime configuration file reading, and data importation for analytics. This tool supports various text-based file formats including `.txt`, `.csv`, `.json`, and adapts its functionality based on the file type, for instance, converting JSON content into a Python dictionary for easy use.
|
||||
|
||||
The tool also supports reading specific chunks of a file by specifying a starting line and the number of lines to read, which is helpful when working with large files that don't need to be loaded entirely into memory.
|
||||
|
||||
## Installation
|
||||
|
||||
Install the crewai_tools package to use the FileReadTool in your projects:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
To get started with the FileReadTool:
|
||||
|
||||
```python
|
||||
from crewai_tools import FileReadTool
|
||||
|
||||
# Initialize the tool to read any files the agents knows or lean the path for
|
||||
file_read_tool = FileReadTool()
|
||||
|
||||
# OR
|
||||
|
||||
# Initialize the tool with a specific file path, so the agent can only read the content of the specified file
|
||||
file_read_tool = FileReadTool(file_path='path/to/your/file.txt')
|
||||
|
||||
# Read a specific chunk of the file (lines 100-149)
|
||||
partial_content = file_read_tool.run(file_path='path/to/your/file.txt', start_line=100, line_count=50)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `file_path`: The path to the file you want to read. It accepts both absolute and relative paths. Ensure the file exists and you have the necessary permissions to access it.
|
||||
- `start_line`: (Optional) The line number to start reading from (1-indexed). Defaults to 1 (the first line).
|
||||
- `line_count`: (Optional) The number of lines to read. If not provided, reads from the start_line to the end of the file.
|
||||
97
crewai_tools/tools/file_read_tool/file_read_tool.py
Normal file
97
crewai_tools/tools/file_read_tool/file_read_tool.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class FileReadToolSchema(BaseModel):
|
||||
"""Input for FileReadTool."""
|
||||
|
||||
file_path: str = Field(..., description="Mandatory file full path to read the file")
|
||||
start_line: Optional[int] = Field(1, description="Line number to start reading from (1-indexed)")
|
||||
line_count: Optional[int] = Field(None, description="Number of lines to read. If None, reads the entire file")
|
||||
|
||||
|
||||
class FileReadTool(BaseTool):
|
||||
"""A tool for reading file contents.
|
||||
|
||||
This tool inherits its schema handling from BaseTool to avoid recursive schema
|
||||
definition issues. The args_schema is set to FileReadToolSchema which defines
|
||||
the required file_path parameter. The schema should not be overridden in the
|
||||
constructor as it would break the inheritance chain and cause infinite loops.
|
||||
|
||||
The tool supports two ways of specifying the file path:
|
||||
1. At construction time via the file_path parameter
|
||||
2. At runtime via the file_path parameter in the tool's input
|
||||
|
||||
Args:
|
||||
file_path (Optional[str]): Path to the file to be read. If provided,
|
||||
this becomes the default file path for the tool.
|
||||
**kwargs: Additional keyword arguments passed to BaseTool.
|
||||
|
||||
Example:
|
||||
>>> tool = FileReadTool(file_path="/path/to/file.txt")
|
||||
>>> content = tool.run() # Reads /path/to/file.txt
|
||||
>>> content = tool.run(file_path="/path/to/other.txt") # Reads other.txt
|
||||
>>> content = tool.run(file_path="/path/to/file.txt", start_line=100, line_count=50) # Reads lines 100-149
|
||||
"""
|
||||
|
||||
name: str = "Read a file's content"
|
||||
description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read."
|
||||
args_schema: Type[BaseModel] = FileReadToolSchema
|
||||
file_path: Optional[str] = None
|
||||
|
||||
def __init__(self, file_path: Optional[str] = None, **kwargs: Any) -> None:
|
||||
"""Initialize the FileReadTool.
|
||||
|
||||
Args:
|
||||
file_path (Optional[str]): Path to the file to be read. If provided,
|
||||
this becomes the default file path for the tool.
|
||||
**kwargs: Additional keyword arguments passed to BaseTool.
|
||||
"""
|
||||
if file_path is not None:
|
||||
kwargs["description"] = (
|
||||
f"A tool that reads file content. The default file is {file_path}, but you can provide a different 'file_path' parameter to read another file. You can also specify 'start_line' and 'line_count' to read specific parts of the file."
|
||||
)
|
||||
|
||||
super().__init__(**kwargs)
|
||||
self.file_path = file_path
|
||||
|
||||
def _run(
|
||||
self,
|
||||
file_path: Optional[str] = None,
|
||||
start_line: Optional[int] = 1,
|
||||
line_count: Optional[int] = None,
|
||||
) -> str:
|
||||
file_path = file_path or self.file_path
|
||||
start_line = start_line or 1
|
||||
line_count = line_count or None
|
||||
|
||||
if file_path is None:
|
||||
return (
|
||||
"Error: No file path provided. Please provide a file path either in the constructor or as an argument."
|
||||
)
|
||||
|
||||
try:
|
||||
with open(file_path, "r") as file:
|
||||
if start_line == 1 and line_count is None:
|
||||
return file.read()
|
||||
|
||||
start_idx = max(start_line - 1, 0)
|
||||
|
||||
selected_lines = [
|
||||
line
|
||||
for i, line in enumerate(file)
|
||||
if i >= start_idx and (line_count is None or i < start_idx + line_count)
|
||||
]
|
||||
|
||||
if not selected_lines and start_idx > 0:
|
||||
return f"Error: Start line {start_line} exceeds the number of lines in the file."
|
||||
|
||||
return "".join(selected_lines)
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found at path: {file_path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied when trying to read file: {file_path}"
|
||||
except Exception as e:
|
||||
return f"Error: Failed to read file {file_path}. {str(e)}"
|
||||
35
crewai_tools/tools/file_writer_tool/README.md
Normal file
35
crewai_tools/tools/file_writer_tool/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
Here's the rewritten README for the `FileWriterTool`:
|
||||
|
||||
# FileWriterTool Documentation
|
||||
|
||||
## Description
|
||||
The `FileWriterTool` is a component of the crewai_tools package, designed to simplify the process of writing content to files. It is particularly useful in scenarios such as generating reports, saving logs, creating configuration files, and more. This tool supports creating new directories if they don't exist, making it easier to organize your output.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package to use the `FileWriterTool` in your projects:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
To get started with the `FileWriterTool`:
|
||||
|
||||
```python
|
||||
from crewai_tools import FileWriterTool
|
||||
|
||||
# Initialize the tool
|
||||
file_writer_tool = FileWriterTool()
|
||||
|
||||
# Write content to a file in a specified directory
|
||||
result = file_writer_tool._run('example.txt', 'This is a test content.', 'test_directory')
|
||||
print(result)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `filename`: The name of the file you want to create or overwrite.
|
||||
- `content`: The content to write into the file.
|
||||
- `directory` (optional): The path to the directory where the file will be created. Defaults to the current directory (`.`). If the directory does not exist, it will be created.
|
||||
|
||||
## Conclusion
|
||||
By integrating the `FileWriterTool` into your crews, the agents can execute the process of writing content to files and creating directories. This tool is essential for tasks that require saving output data, creating structured file systems, and more. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is straightforward and efficient.
|
||||
62
crewai_tools/tools/file_writer_tool/file_writer_tool.py
Normal file
62
crewai_tools/tools/file_writer_tool/file_writer_tool.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
def strtobool(val) -> bool:
|
||||
if isinstance(val, bool):
|
||||
return val
|
||||
val = val.lower()
|
||||
if val in ("y", "yes", "t", "true", "on", "1"):
|
||||
return True
|
||||
elif val in ("n", "no", "f", "false", "off", "0"):
|
||||
return False
|
||||
else:
|
||||
raise ValueError(f"invalid value to cast to bool: {val!r}")
|
||||
|
||||
|
||||
class FileWriterToolInput(BaseModel):
|
||||
filename: str
|
||||
directory: Optional[str] = "./"
|
||||
overwrite: str | bool = False
|
||||
content: str
|
||||
|
||||
|
||||
class FileWriterTool(BaseTool):
|
||||
name: str = "File Writer Tool"
|
||||
description: str = (
|
||||
"A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
|
||||
)
|
||||
args_schema: Type[BaseModel] = FileWriterToolInput
|
||||
|
||||
def _run(self, **kwargs: Any) -> str:
|
||||
try:
|
||||
# Create the directory if it doesn't exist
|
||||
if kwargs.get("directory") and not os.path.exists(kwargs["directory"]):
|
||||
os.makedirs(kwargs["directory"])
|
||||
|
||||
# Construct the full path
|
||||
filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"])
|
||||
|
||||
# Convert overwrite to boolean
|
||||
kwargs["overwrite"] = strtobool(kwargs["overwrite"])
|
||||
|
||||
# Check if file exists and overwrite is not allowed
|
||||
if os.path.exists(filepath) and not kwargs["overwrite"]:
|
||||
return f"File {filepath} already exists and overwrite option was not passed."
|
||||
|
||||
# Write content to the file
|
||||
mode = "w" if kwargs["overwrite"] else "x"
|
||||
with open(filepath, mode) as file:
|
||||
file.write(kwargs["content"])
|
||||
return f"Content successfully written to {filepath}"
|
||||
except FileExistsError:
|
||||
return (
|
||||
f"File {filepath} already exists and overwrite option was not passed."
|
||||
)
|
||||
except KeyError as e:
|
||||
return f"An error occurred while accessing key: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"An error occurred while writing to the file: {str(e)}"
|
||||
@@ -0,0 +1,138 @@
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.file_writer_tool.file_writer_tool import FileWriterTool
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tool():
|
||||
return FileWriterTool()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_env():
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
test_file = "test.txt"
|
||||
test_content = "Hello, World!"
|
||||
|
||||
yield {
|
||||
"temp_dir": temp_dir,
|
||||
"test_file": test_file,
|
||||
"test_content": test_content,
|
||||
}
|
||||
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
def get_test_path(filename, directory):
|
||||
return os.path.join(directory, filename)
|
||||
|
||||
|
||||
def read_file(path):
|
||||
with open(path, "r") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def test_basic_file_write(tool, temp_env):
|
||||
result = tool._run(
|
||||
filename=temp_env["test_file"],
|
||||
directory=temp_env["temp_dir"],
|
||||
content=temp_env["test_content"],
|
||||
overwrite=True,
|
||||
)
|
||||
|
||||
path = get_test_path(temp_env["test_file"], temp_env["temp_dir"])
|
||||
assert os.path.exists(path)
|
||||
assert read_file(path) == temp_env["test_content"]
|
||||
assert "successfully written" in result
|
||||
|
||||
|
||||
def test_directory_creation(tool, temp_env):
|
||||
new_dir = os.path.join(temp_env["temp_dir"], "nested_dir")
|
||||
result = tool._run(
|
||||
filename=temp_env["test_file"],
|
||||
directory=new_dir,
|
||||
content=temp_env["test_content"],
|
||||
overwrite=True,
|
||||
)
|
||||
|
||||
path = get_test_path(temp_env["test_file"], new_dir)
|
||||
assert os.path.exists(new_dir)
|
||||
assert os.path.exists(path)
|
||||
assert "successfully written" in result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"overwrite",
|
||||
["y", "yes", "t", "true", "on", "1", True],
|
||||
)
|
||||
def test_overwrite_true(tool, temp_env, overwrite):
|
||||
path = get_test_path(temp_env["test_file"], temp_env["temp_dir"])
|
||||
with open(path, "w") as f:
|
||||
f.write("Original content")
|
||||
|
||||
result = tool._run(
|
||||
filename=temp_env["test_file"],
|
||||
directory=temp_env["temp_dir"],
|
||||
content="New content",
|
||||
overwrite=overwrite,
|
||||
)
|
||||
|
||||
assert read_file(path) == "New content"
|
||||
assert "successfully written" in result
|
||||
|
||||
|
||||
def test_invalid_overwrite_value(tool, temp_env):
|
||||
result = tool._run(
|
||||
filename=temp_env["test_file"],
|
||||
directory=temp_env["temp_dir"],
|
||||
content=temp_env["test_content"],
|
||||
overwrite="invalid",
|
||||
)
|
||||
assert "invalid value" in result
|
||||
|
||||
|
||||
def test_missing_required_fields(tool, temp_env):
|
||||
result = tool._run(
|
||||
directory=temp_env["temp_dir"],
|
||||
content=temp_env["test_content"],
|
||||
overwrite=True,
|
||||
)
|
||||
assert "An error occurred while accessing key: 'filename'" in result
|
||||
|
||||
|
||||
def test_empty_content(tool, temp_env):
|
||||
result = tool._run(
|
||||
filename=temp_env["test_file"],
|
||||
directory=temp_env["temp_dir"],
|
||||
content="",
|
||||
overwrite=True,
|
||||
)
|
||||
|
||||
path = get_test_path(temp_env["test_file"], temp_env["temp_dir"])
|
||||
assert os.path.exists(path)
|
||||
assert read_file(path) == ""
|
||||
assert "successfully written" in result
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"overwrite",
|
||||
["n", "no", "f", "false", "off", "0", False],
|
||||
)
|
||||
def test_file_exists_error_handling(tool, temp_env, overwrite):
|
||||
path = get_test_path(temp_env["test_file"], temp_env["temp_dir"])
|
||||
with open(path, "w") as f:
|
||||
f.write("Pre-existing content")
|
||||
|
||||
result = tool._run(
|
||||
filename=temp_env["test_file"],
|
||||
directory=temp_env["temp_dir"],
|
||||
content="Should not be written",
|
||||
overwrite=overwrite,
|
||||
)
|
||||
|
||||
assert "already exists and overwrite option was not passed" in result
|
||||
assert read_file(path) == "Pre-existing content"
|
||||
119
crewai_tools/tools/files_compressor_tool/README.md
Normal file
119
crewai_tools/tools/files_compressor_tool/README.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# 📦 FileCompressorTool
|
||||
|
||||
The **FileCompressorTool** is a utility for compressing individual files or entire directories (including nested subdirectories) into different archive formats, such as `.zip` or `.tar` (including `.tar.gz`, `.tar.bz2`, and `.tar.xz`). This tool is useful for archiving logs, documents, datasets, or backups in a compact format, and ensures flexibility in how the archives are created.
|
||||
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
This tool:
|
||||
- Accepts a **file or directory** as input.
|
||||
- Supports **recursive compression** of subdirectories.
|
||||
- Lets you define a **custom output archive path** or defaults to the current directory.
|
||||
- Handles **overwrite protection** to avoid unintentional data loss.
|
||||
- Supports multiple compression formats: `.zip`, `.tar`, `.tar.gz`, `.tar.bz2`, and `.tar.xz`.
|
||||
|
||||
---
|
||||
|
||||
## Arguments
|
||||
|
||||
| Argument | Type | Required | Description |
|
||||
|---------------|-----------|----------|-----------------------------------------------------------------------------|
|
||||
| `input_path` | `str` | ✅ | Path to the file or directory you want to compress. |
|
||||
| `output_path` | `str` | ❌ | Optional path for the resulting archive file. Defaults to `./<name>.<format>`. |
|
||||
| `overwrite` | `bool` | ❌ | Whether to overwrite an existing archive file. Defaults to `False`. |
|
||||
| `format` | `str` | ❌ | Compression format to use. Can be one of `zip`, `tar`, `tar.gz`, `tar.bz2`, `tar.xz`. Defaults to `zip`. |
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
from crewai_tools import FileCompressorTool
|
||||
|
||||
# Initialize the tool
|
||||
tool = FileCompressorTool()
|
||||
|
||||
# Compress a directory with subdirectories and files into a zip archive
|
||||
result = tool._run(
|
||||
input_path="./data/project_docs", # Folder containing subfolders & files
|
||||
output_path="./output/project_docs.zip", # Optional output path (defaults to zip format)
|
||||
overwrite=True # Allow overwriting if file exists
|
||||
)
|
||||
print(result)
|
||||
# Example output: Successfully compressed './data/project_docs' into './output/project_docs.zip'
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Example Scenarios
|
||||
|
||||
### Compress a single file into a zip archive:
|
||||
```python
|
||||
# Compress a single file into a zip archive
|
||||
result = tool._run(input_path="report.pdf")
|
||||
# Example output: Successfully compressed 'report.pdf' into './report.zip'
|
||||
```
|
||||
|
||||
### Compress a directory with nested folders into a zip archive:
|
||||
```python
|
||||
# Compress a directory containing nested subdirectories and files
|
||||
result = tool._run(input_path="./my_data", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.zip'
|
||||
```
|
||||
|
||||
### Use a custom output path with a zip archive:
|
||||
```python
|
||||
# Compress a directory and specify a custom zip output location
|
||||
result = tool._run(input_path="./my_data", output_path="./backups/my_data_backup.zip", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './backups/my_data_backup.zip'
|
||||
```
|
||||
|
||||
### Prevent overwriting an existing zip file:
|
||||
```python
|
||||
# Try to compress a directory without overwriting an existing zip file
|
||||
result = tool._run(input_path="./my_data", output_path="./backups/my_data_backup.zip", overwrite=False)
|
||||
# Example output: Output zip './backups/my_data_backup.zip' already exists and overwrite is set to False.
|
||||
```
|
||||
|
||||
### Compress into a tar archive:
|
||||
```python
|
||||
# Compress a directory into a tar archive
|
||||
result = tool._run(input_path="./my_data", format="tar", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar'
|
||||
```
|
||||
|
||||
### Compress into a tar.gz archive:
|
||||
```python
|
||||
# Compress a directory into a tar.gz archive
|
||||
result = tool._run(input_path="./my_data", format="tar.gz", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar.gz'
|
||||
```
|
||||
|
||||
### Compress into a tar.bz2 archive:
|
||||
```python
|
||||
# Compress a directory into a tar.bz2 archive
|
||||
result = tool._run(input_path="./my_data", format="tar.bz2", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar.bz2'
|
||||
```
|
||||
|
||||
### Compress into a tar.xz archive:
|
||||
```python
|
||||
# Compress a directory into a tar.xz archive
|
||||
result = tool._run(input_path="./my_data", format="tar.xz", overwrite=True)
|
||||
# Example output: Successfully compressed 'my_data' into './my_data.tar.xz'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Error Handling and Validations
|
||||
|
||||
- **File Extension Validation**: The tool ensures that the output file extension matches the selected format (e.g., `.zip` for `zip` format, `.tar` for `tar` format, etc.).
|
||||
- **File/Directory Existence**: If the input path does not exist, an error message will be returned.
|
||||
- **Overwrite Protection**: If a file already exists at the output path, the tool checks the `overwrite` flag before proceeding. If `overwrite=False`, it prevents overwriting the existing file.
|
||||
|
||||
---
|
||||
|
||||
This tool provides a flexible and robust way to handle file and directory compression across multiple formats for efficient storage and backups.
|
||||
@@ -0,0 +1,117 @@
|
||||
import os
|
||||
import zipfile
|
||||
import tarfile
|
||||
from typing import Type, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
|
||||
class FileCompressorToolInput(BaseModel):
|
||||
"""Input schema for FileCompressorTool."""
|
||||
input_path: str = Field(..., description="Path to the file or directory to compress.")
|
||||
output_path: Optional[str] = Field(default=None, description="Optional output archive filename.")
|
||||
overwrite: bool = Field(default=False, description="Whether to overwrite the archive if it already exists.")
|
||||
format: str = Field(default="zip", description="Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').")
|
||||
|
||||
|
||||
class FileCompressorTool(BaseTool):
|
||||
name: str = "File Compressor Tool"
|
||||
description: str = (
|
||||
"Compresses a file or directory into an archive (.zip currently supported). "
|
||||
"Useful for archiving logs, documents, or backups."
|
||||
)
|
||||
args_schema: Type[BaseModel] = FileCompressorToolInput
|
||||
|
||||
|
||||
def _run(self, input_path: str, output_path: Optional[str] = None, overwrite: bool = False, format: str = "zip") -> str:
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
return f"Input path '{input_path}' does not exist."
|
||||
|
||||
if not output_path:
|
||||
output_path = self._generate_output_path(input_path, format)
|
||||
|
||||
FORMAT_EXTENSION = {
|
||||
"zip": ".zip",
|
||||
"tar": ".tar",
|
||||
"tar.gz": ".tar.gz",
|
||||
"tar.bz2": ".tar.bz2",
|
||||
"tar.xz": ".tar.xz"
|
||||
}
|
||||
|
||||
if format not in FORMAT_EXTENSION:
|
||||
return f"Compression format '{format}' is not supported. Allowed formats: {', '.join(FORMAT_EXTENSION.keys())}"
|
||||
elif not output_path.endswith(FORMAT_EXTENSION[format]):
|
||||
return f"Error: If '{format}' format is chosen, output file must have a '{FORMAT_EXTENSION[format]}' extension."
|
||||
if not self._prepare_output(output_path, overwrite):
|
||||
return f"Output '{output_path}' already exists and overwrite is set to False."
|
||||
|
||||
try:
|
||||
format_compression = {
|
||||
"zip": self._compress_zip,
|
||||
"tar": self._compress_tar,
|
||||
"tar.gz": self._compress_tar,
|
||||
"tar.bz2": self._compress_tar,
|
||||
"tar.xz": self._compress_tar
|
||||
}
|
||||
if format == "zip":
|
||||
format_compression[format](input_path, output_path)
|
||||
else:
|
||||
format_compression[format](input_path, output_path, format)
|
||||
|
||||
return f"Successfully compressed '{input_path}' into '{output_path}'"
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found at path: {input_path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied when accessing '{input_path}' or writing '{output_path}'"
|
||||
except Exception as e:
|
||||
return f"An unexpected error occurred during compression: {str(e)}"
|
||||
|
||||
|
||||
def _generate_output_path(self, input_path: str, format: str) -> str:
|
||||
"""Generates output path based on input path and format."""
|
||||
if os.path.isfile(input_path):
|
||||
base_name = os.path.splitext(os.path.basename(input_path))[0] # Remove extension
|
||||
else:
|
||||
base_name = os.path.basename(os.path.normpath(input_path)) # Directory name
|
||||
return os.path.join(os.getcwd(), f"{base_name}.{format}")
|
||||
|
||||
def _prepare_output(self, output_path: str, overwrite: bool) -> bool:
|
||||
"""Ensures output path is ready for writing."""
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
if os.path.exists(output_path) and not overwrite:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _compress_zip(self, input_path: str, output_path: str):
|
||||
"""Compresses input into a zip archive."""
|
||||
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||||
if os.path.isfile(input_path):
|
||||
zipf.write(input_path, os.path.basename(input_path))
|
||||
else:
|
||||
for root, _, files in os.walk(input_path):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
arcname = os.path.relpath(full_path, start=input_path)
|
||||
zipf.write(full_path, arcname)
|
||||
|
||||
|
||||
def _compress_tar(self, input_path: str, output_path: str, format: str):
|
||||
"""Compresses input into a tar archive with the given format."""
|
||||
format_mode = {
|
||||
"tar": "w",
|
||||
"tar.gz": "w:gz",
|
||||
"tar.bz2": "w:bz2",
|
||||
"tar.xz": "w:xz"
|
||||
}
|
||||
|
||||
if format not in format_mode:
|
||||
raise ValueError(f"Unsupported tar format: {format}")
|
||||
|
||||
mode = format_mode[format]
|
||||
|
||||
with tarfile.open(output_path, mode) as tarf:
|
||||
arcname = os.path.basename(input_path)
|
||||
tarf.add(input_path, arcname=arcname)
|
||||
@@ -0,0 +1,93 @@
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from crewai_tools.tools.files_compressor_tool import FileCompressorTool
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
@pytest.fixture
|
||||
def tool():
|
||||
return FileCompressorTool()
|
||||
|
||||
@patch("os.path.exists", return_value=False)
|
||||
def test_input_path_does_not_exist(mock_exists, tool):
|
||||
result = tool._run("nonexistent_path")
|
||||
assert "does not exist" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("os.getcwd", return_value="/mocked/cwd")
|
||||
@patch.object(FileCompressorTool, "_compress_zip") # Mock actual compression
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_generate_output_path_default(mock_prepare, mock_compress, mock_cwd, mock_exists, tool):
|
||||
result = tool._run(input_path="mydir", format="zip")
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch.object(FileCompressorTool, "_compress_zip")
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_zip_compression(mock_prepare, mock_compress, mock_exists, tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=True)
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch.object(FileCompressorTool, "_compress_tar")
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_tar_gz_compression(mock_prepare, mock_compress, mock_exists, tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.tar.gz", format="tar.gz", overwrite=True)
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@pytest.mark.parametrize("format", ["tar", "tar.bz2", "tar.xz"])
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch.object(FileCompressorTool, "_compress_tar")
|
||||
@patch.object(FileCompressorTool, "_prepare_output", return_value=True)
|
||||
def test_other_tar_formats(mock_prepare, mock_compress, mock_exists, format, tool):
|
||||
result = tool._run(input_path="path/to/input", output_path=f"archive.{format}", format=format, overwrite=True)
|
||||
assert "Successfully compressed" in result
|
||||
mock_compress.assert_called_once()
|
||||
|
||||
@pytest.mark.parametrize("format", ["rar", "7z"])
|
||||
@patch("os.path.exists", return_value=True) #Ensure input_path exists
|
||||
def test_unsupported_format(_, tool, format):
|
||||
result = tool._run(input_path="some/path", output_path=f"archive.{format}", format=format)
|
||||
assert "not supported" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
def test_extension_mismatch(_ , tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.zip", format="tar.gz")
|
||||
assert "must have a '.tar.gz' extension" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("os.path.isfile", return_value=True)
|
||||
@patch("os.path.exists", return_value=True)
|
||||
def test_existing_output_no_overwrite(_, __, ___, tool):
|
||||
result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=False)
|
||||
assert "overwrite is set to False" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("zipfile.ZipFile", side_effect=PermissionError)
|
||||
def test_permission_error(mock_zip, _, tool):
|
||||
result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True)
|
||||
assert "Permission denied" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("zipfile.ZipFile", side_effect=FileNotFoundError)
|
||||
def test_file_not_found_during_zip(mock_zip, _, tool):
|
||||
result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True)
|
||||
assert "File not found" in result
|
||||
|
||||
@patch("os.path.exists", return_value=True)
|
||||
@patch("zipfile.ZipFile", side_effect=Exception("Unexpected"))
|
||||
def test_general_exception_during_zip(mock_zip, _, tool):
|
||||
result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True)
|
||||
assert "unexpected error" in result
|
||||
|
||||
# Test: Output directory is created when missing
|
||||
@patch("os.makedirs")
|
||||
@patch("os.path.exists", return_value=False)
|
||||
def test_prepare_output_makes_dir(mock_exists, mock_makedirs):
|
||||
tool = FileCompressorTool()
|
||||
result = tool._prepare_output("some/missing/path/file.zip", overwrite=True)
|
||||
assert result is True
|
||||
mock_makedirs.assert_called_once()
|
||||
60
crewai_tools/tools/firecrawl_crawl_website_tool/README.md
Normal file
60
crewai_tools/tools/firecrawl_crawl_website_tool/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# FirecrawlCrawlWebsiteTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Version Compatibility
|
||||
|
||||
This implementation is compatible with FireCrawl API v1
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlScrapeFromWebsiteTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlCrawlWebsiteTool
|
||||
from firecrawl import ScrapeOptions
|
||||
|
||||
tool = FirecrawlCrawlWebsiteTool(
|
||||
config={
|
||||
"limit": 100,
|
||||
"scrape_options": ScrapeOptions(formats=["markdown", "html"]),
|
||||
"poll_interval": 30,
|
||||
}
|
||||
)
|
||||
tool.run(url="firecrawl.dev")
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `config`: Optional. It contains Firecrawl API parameters.
|
||||
|
||||
This is the default configuration
|
||||
|
||||
```python
|
||||
from firecrawl import ScrapeOptions
|
||||
|
||||
{
|
||||
"max_depth": 2,
|
||||
"ignore_sitemap": True,
|
||||
"limit": 100,
|
||||
"allow_backward_links": False,
|
||||
"allow_external_links": False,
|
||||
"scrape_options": ScrapeOptions(
|
||||
formats=["markdown", "screenshot", "links"],
|
||||
only_main_content=True,
|
||||
timeout=30000,
|
||||
),
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,115 @@
|
||||
from typing import Any, Optional, Type, List, TYPE_CHECKING
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
FIRECRAWL_AVAILABLE = False
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
"""
|
||||
Tool for crawling websites using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
|
||||
Default configuration options:
|
||||
max_depth (int): Maximum depth to crawl. Default: 2
|
||||
ignore_sitemap (bool): Whether to ignore sitemap. Default: True
|
||||
limit (int): Maximum number of pages to crawl. Default: 100
|
||||
allow_backward_links (bool): Allow crawling backward links. Default: False
|
||||
allow_external_links (bool): Allow crawling external links. Default: False
|
||||
scrape_options (ScrapeOptions): Options for scraping content
|
||||
- formats (list[str]): Content formats to return. Default: ["markdown", "screenshot", "links"]
|
||||
- only_main_content (bool): Only return main content. Default: True
|
||||
- timeout (int): Timeout in milliseconds. Default: 30000
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
|
||||
)
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
description: str = "Crawl webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
config: Optional[dict[str, Any]] = Field(
|
||||
default_factory=lambda: {
|
||||
"maxDepth": 2,
|
||||
"ignoreSitemap": True,
|
||||
"limit": 10,
|
||||
"allowBackwardLinks": False,
|
||||
"allowExternalLinks": False,
|
||||
"scrapeOptions": {
|
||||
"formats": ["markdown", "screenshot", "links"],
|
||||
"onlyMainContent": True,
|
||||
"timeout": 10000,
|
||||
},
|
||||
}
|
||||
)
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
package_dependencies: List[str] = ["firecrawl-py"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="FIRECRAWL_API_KEY", description="API key for Firecrawl services", required=True),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.api_key = api_key
|
||||
self._initialize_firecrawl()
|
||||
|
||||
def _initialize_firecrawl(self) -> None:
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
|
||||
self._firecrawl = FirecrawlApp(api_key=self.api_key)
|
||||
except ImportError:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'firecrawl-py' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(["uv", "add", "firecrawl-py"], check=True)
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
self._firecrawl = FirecrawlApp(api_key=self.api_key)
|
||||
except subprocess.CalledProcessError:
|
||||
raise ImportError("Failed to install firecrawl-py package")
|
||||
else:
|
||||
raise ImportError(
|
||||
"`firecrawl-py` package not found, please run `uv add firecrawl-py`"
|
||||
)
|
||||
|
||||
def _run(self, url: str):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.crawl_url(url, poll_interval=2, params=self.config)
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
# Only rebuild if the class hasn't been initialized yet
|
||||
if not hasattr(FirecrawlCrawlWebsiteTool, "_model_rebuilt"):
|
||||
FirecrawlCrawlWebsiteTool.model_rebuild()
|
||||
FirecrawlCrawlWebsiteTool._model_rebuilt = True
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
"""
|
||||
46
crewai_tools/tools/firecrawl_scrape_website_tool/README.md
Normal file
46
crewai_tools/tools/firecrawl_scrape_website_tool/README.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# FirecrawlScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlScrapeWebsiteTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlScrapeWebsiteTool
|
||||
|
||||
tool = FirecrawlScrapeWebsiteTool(config={"formats": ['html']})
|
||||
tool.run(url="firecrawl.dev")
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `config`: Optional. It contains Firecrawl API parameters.
|
||||
|
||||
|
||||
This is the default configuration
|
||||
|
||||
```python
|
||||
{
|
||||
"formats": ["markdown"],
|
||||
"only_main_content": True,
|
||||
"include_tags": [],
|
||||
"exclude_tags": [],
|
||||
"headers": {},
|
||||
"wait_for": 0,
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
from typing import Any, Optional, Type, Dict, List, TYPE_CHECKING
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
FIRECRAWL_AVAILABLE = False
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
"""
|
||||
Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
|
||||
Default configuration options:
|
||||
formats (list[str]): Content formats to return. Default: ["markdown"]
|
||||
onlyMainContent (bool): Only return main content. Default: True
|
||||
includeTags (list[str]): Tags to include. Default: []
|
||||
excludeTags (list[str]): Tags to exclude. Default: []
|
||||
headers (dict): Headers to include. Default: {}
|
||||
waitFor (int): Time to wait for page to load in ms. Default: 0
|
||||
json_options (dict): Options for JSON extraction. Default: None
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
|
||||
)
|
||||
name: str = "Firecrawl web scrape tool"
|
||||
description: str = "Scrape webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
|
||||
api_key: Optional[str] = None
|
||||
config: Dict[str, Any] = Field(
|
||||
default_factory=lambda: {
|
||||
"formats": ["markdown"],
|
||||
"onlyMainContent": True,
|
||||
"includeTags": [],
|
||||
"excludeTags": [],
|
||||
"headers": {},
|
||||
"waitFor": 0,
|
||||
}
|
||||
)
|
||||
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
package_dependencies: List[str] = ["firecrawl-py"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="FIRECRAWL_API_KEY", description="API key for Firecrawl services", required=True),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'firecrawl-py' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "firecrawl-py"], check=True)
|
||||
from firecrawl import (
|
||||
FirecrawlApp,
|
||||
)
|
||||
else:
|
||||
raise ImportError(
|
||||
"`firecrawl-py` package not found, please run `uv add firecrawl-py`"
|
||||
)
|
||||
|
||||
self._firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.scrape_url(url, params=self.config)
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
# Must rebuild model after class is defined
|
||||
if not hasattr(FirecrawlScrapeWebsiteTool, "_model_rebuilt"):
|
||||
FirecrawlScrapeWebsiteTool.model_rebuild()
|
||||
FirecrawlScrapeWebsiteTool._model_rebuilt = True
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
"""
|
||||
44
crewai_tools/tools/firecrawl_search_tool/README.md
Normal file
44
crewai_tools/tools/firecrawl_search_tool/README.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# FirecrawlSearchTool
|
||||
|
||||
## Description
|
||||
|
||||
[Firecrawl](https://firecrawl.dev) is a platform for crawling and convert any website into clean markdown or structured data.
|
||||
|
||||
## Installation
|
||||
|
||||
- Get an API key from [firecrawl.dev](https://firecrawl.dev) and set it in environment variables (`FIRECRAWL_API_KEY`).
|
||||
- Install the [Firecrawl SDK](https://github.com/mendableai/firecrawl) along with `crewai[tools]` package:
|
||||
|
||||
```
|
||||
pip install firecrawl-py 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the FirecrawlSearchTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import FirecrawlSearchTool
|
||||
|
||||
tool = FirecrawlSearchTool(config={"limit": 5})
|
||||
tool.run(query="firecrawl web scraping")
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Optional. Specifies Firecrawl API key. Defaults is the `FIRECRAWL_API_KEY` environment variable.
|
||||
- `config`: Optional. It contains Firecrawl API parameters.
|
||||
|
||||
|
||||
This is the default configuration
|
||||
|
||||
```python
|
||||
{
|
||||
"limit": 5,
|
||||
"tbs": None,
|
||||
"lang": "en",
|
||||
"country": "us",
|
||||
"location": None,
|
||||
"timeout": 60000,
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,119 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type, List
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
FIRECRAWL_AVAILABLE = True
|
||||
except ImportError:
|
||||
FIRECRAWL_AVAILABLE = False
|
||||
|
||||
|
||||
class FirecrawlSearchToolSchema(BaseModel):
|
||||
query: str = Field(description="Search query")
|
||||
|
||||
|
||||
class FirecrawlSearchTool(BaseTool):
|
||||
"""
|
||||
Tool for searching webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.
|
||||
|
||||
Args:
|
||||
api_key (str): Your Firecrawl API key.
|
||||
config (dict): Optional. It contains Firecrawl API parameters.
|
||||
|
||||
Default configuration options:
|
||||
limit (int): Maximum number of pages to crawl. Default: 5
|
||||
tbs (str): Time before search. Default: None
|
||||
lang (str): Language. Default: "en"
|
||||
country (str): Country. Default: "us"
|
||||
location (str): Location. Default: None
|
||||
timeout (int): Timeout in milliseconds. Default: 60000
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
|
||||
)
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
|
||||
)
|
||||
name: str = "Firecrawl web search tool"
|
||||
description: str = "Search webpages using Firecrawl and return the results"
|
||||
args_schema: Type[BaseModel] = FirecrawlSearchToolSchema
|
||||
api_key: Optional[str] = None
|
||||
config: Optional[dict[str, Any]] = Field(
|
||||
default_factory=lambda: {
|
||||
"limit": 5,
|
||||
"tbs": None,
|
||||
"lang": "en",
|
||||
"country": "us",
|
||||
"location": None,
|
||||
"timeout": 60000,
|
||||
}
|
||||
)
|
||||
_firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None)
|
||||
package_dependencies: List[str] = ["firecrawl-py"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="FIRECRAWL_API_KEY", description="API key for Firecrawl services", required=True),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.api_key = api_key
|
||||
self._initialize_firecrawl()
|
||||
|
||||
def _initialize_firecrawl(self) -> None:
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
|
||||
self._firecrawl = FirecrawlApp(api_key=self.api_key)
|
||||
except ImportError:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'firecrawl-py' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
subprocess.run(["uv", "add", "firecrawl-py"], check=True)
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
self._firecrawl = FirecrawlApp(api_key=self.api_key)
|
||||
except subprocess.CalledProcessError:
|
||||
raise ImportError("Failed to install firecrawl-py package")
|
||||
else:
|
||||
raise ImportError(
|
||||
"`firecrawl-py` package not found, please run `uv add firecrawl-py`"
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
) -> Any:
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
return self._firecrawl.search(
|
||||
query=query,
|
||||
params=self.config,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
|
||||
# Only rebuild if the class hasn't been initialized yet
|
||||
if not hasattr(FirecrawlSearchTool, "_model_rebuilt"):
|
||||
FirecrawlSearchTool.model_rebuild()
|
||||
FirecrawlSearchTool._model_rebuilt = True
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
"""
|
||||
pass
|
||||
50
crewai_tools/tools/generate_crewai_automation_tool/README.md
Normal file
50
crewai_tools/tools/generate_crewai_automation_tool/README.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# GenerateCrewaiAutomationTool
|
||||
|
||||
## Description
|
||||
|
||||
The GenerateCrewaiAutomationTool integrates with CrewAI Studio API to generate complete CrewAI automations from natural language descriptions. It translates high-level requirements into functional CrewAI implementations and returns direct links to Studio projects.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set your CrewAI Personal Access Token (CrewAI Enterprise > Settings > Account > Personal Access Token):
|
||||
|
||||
```bash
|
||||
export CREWAI_PERSONAL_ACCESS_TOKEN="your_personal_access_token_here"
|
||||
export CREWAI_PLUS_URL="https://app.crewai.com" # optional
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```python
|
||||
from crewai_tools import GenerateCrewaiAutomationTool
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
# Initialize tool
|
||||
tool = GenerateCrewaiAutomationTool()
|
||||
|
||||
# Generate automation
|
||||
result = tool.run(
|
||||
prompt="Generate a CrewAI automation that scrapes websites and stores data in a database",
|
||||
organization_id="org_123" # optional but recommended
|
||||
)
|
||||
|
||||
print(result)
|
||||
# Output: Generated CrewAI Studio project URL: https://studio.crewai.com/project/abc123
|
||||
|
||||
# Use with agent
|
||||
agent = Agent(
|
||||
role="Automation Architect",
|
||||
goal="Generate CrewAI automations",
|
||||
backstory="Expert at creating automated workflows",
|
||||
tools=[tool]
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Create a lead qualification automation",
|
||||
agent=agent,
|
||||
expected_output="Studio project URL"
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
result = crew.kickoff()
|
||||
```
|
||||
@@ -0,0 +1,70 @@
|
||||
import os
|
||||
from typing import List, Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class GenerateCrewaiAutomationToolSchema(BaseModel):
|
||||
prompt: str = Field(
|
||||
description="The prompt to generate the CrewAI automation, e.g. 'Generate a CrewAI automation that will scrape the website and store the data in a database.'"
|
||||
)
|
||||
organization_id: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The identifier for the CrewAI Enterprise organization. If not specified, a default organization will be used.",
|
||||
)
|
||||
|
||||
|
||||
class GenerateCrewaiAutomationTool(BaseTool):
|
||||
name: str = "Generate CrewAI Automation"
|
||||
description: str = (
|
||||
"A tool that leverages CrewAI Studio's capabilities to automatically generate complete CrewAI "
|
||||
"automations based on natural language descriptions. It translates high-level requirements into "
|
||||
"functional CrewAI implementations."
|
||||
)
|
||||
args_schema: Type[BaseModel] = GenerateCrewaiAutomationToolSchema
|
||||
crewai_enterprise_url: str = Field(
|
||||
default_factory=lambda: os.getenv("CREWAI_PLUS_URL", "https://app.crewai.com"),
|
||||
description="The base URL of CrewAI Enterprise. If not provided, it will be loaded from the environment variable CREWAI_PLUS_URL with default https://app.crewai.com.",
|
||||
)
|
||||
personal_access_token: Optional[str] = Field(
|
||||
default_factory=lambda: os.getenv("CREWAI_PERSONAL_ACCESS_TOKEN"),
|
||||
description="The user's Personal Access Token to access CrewAI Enterprise API. If not provided, it will be loaded from the environment variable CREWAI_PERSONAL_ACCESS_TOKEN.",
|
||||
)
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(
|
||||
name="CREWAI_PERSONAL_ACCESS_TOKEN",
|
||||
description="Personal Access Token for CrewAI Enterprise API",
|
||||
required=True,
|
||||
),
|
||||
EnvVar(
|
||||
name="CREWAI_PLUS_URL",
|
||||
description="Base URL for CrewAI Enterprise API",
|
||||
required=False,
|
||||
),
|
||||
]
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
input_data = GenerateCrewaiAutomationToolSchema(**kwargs)
|
||||
response = requests.post(
|
||||
f"{self.crewai_enterprise_url}/crewai_plus/api/v1/studio",
|
||||
headers=self._get_headers(input_data.organization_id),
|
||||
json={"prompt": input_data.prompt},
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
studio_project_url = response.json().get("url")
|
||||
return f"Generated CrewAI Studio project URL: {studio_project_url}"
|
||||
|
||||
def _get_headers(self, organization_id: Optional[str] = None) -> dict:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.personal_access_token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
if organization_id:
|
||||
headers["X-Crewai-Organization-Id"] = organization_id
|
||||
|
||||
return headers
|
||||
67
crewai_tools/tools/github_search_tool/README.md
Normal file
67
crewai_tools/tools/github_search_tool/README.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# GithubSearchTool
|
||||
|
||||
## Description
|
||||
The GithubSearchTool is a Retrieval Augmented Generation (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
|
||||
|
||||
## Installation
|
||||
To use the GithubSearchTool, first ensure the crewai_tools package is installed in your Python environment:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
This command installs the necessary package to run the GithubSearchTool along with any other tools included in the crewai_tools package.
|
||||
|
||||
## Example
|
||||
Here’s how you can use the GithubSearchTool to perform semantic searches within a GitHub repository:
|
||||
```python
|
||||
from crewai_tools import GithubSearchTool
|
||||
|
||||
# Initialize the tool for semantic searches within a specific GitHub repository
|
||||
tool = GithubSearchTool(
|
||||
gh_token='...',
|
||||
github_repo='https://github.com/example/repo',
|
||||
content_types=['code', 'issue'] # Options: code, repo, pr, issue
|
||||
)
|
||||
|
||||
# OR
|
||||
|
||||
# Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
|
||||
tool = GithubSearchTool(
|
||||
gh_token='...',
|
||||
content_types=['code', 'issue'] # Options: code, repo, pr, issue
|
||||
)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches.
|
||||
- `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
|
||||
- `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = GithubSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
88
crewai_tools/tools/github_search_tool/github_search_tool.py
Normal file
88
crewai_tools/tools/github_search_tool/github_search_tool.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from typing import List, Optional, Type, Any
|
||||
|
||||
try:
|
||||
from embedchain.loaders.github import GithubLoader
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedGithubSearchToolSchema(BaseModel):
|
||||
"""Input for GithubSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the github repo's content",
|
||||
)
|
||||
|
||||
|
||||
class GithubSearchToolSchema(FixedGithubSearchToolSchema):
|
||||
"""Input for GithubSearchTool."""
|
||||
|
||||
github_repo: str = Field(..., description="Mandatory github you want to search")
|
||||
content_types: List[str] = Field(
|
||||
...,
|
||||
description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]",
|
||||
)
|
||||
|
||||
|
||||
class GithubSearchTool(RagTool):
|
||||
name: str = "Search a github repo's content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
|
||||
)
|
||||
summarize: bool = False
|
||||
gh_token: str
|
||||
args_schema: Type[BaseModel] = GithubSearchToolSchema
|
||||
content_types: List[str] = Field(
|
||||
default_factory=lambda: ["code", "repo", "pr", "issue"],
|
||||
description="Content types you want to be included search, options: [code, repo, pr, issue]",
|
||||
)
|
||||
_loader: Any | None = PrivateAttr(default=None)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
github_repo: Optional[str] = None,
|
||||
content_types: Optional[List[str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().__init__(**kwargs)
|
||||
self._loader = GithubLoader(config={"token": self.gh_token})
|
||||
|
||||
if github_repo and content_types:
|
||||
self.add(repo=github_repo, content_types=content_types)
|
||||
self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
|
||||
self.args_schema = FixedGithubSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def add(
|
||||
self,
|
||||
repo: str,
|
||||
content_types: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
content_types = content_types or self.content_types
|
||||
|
||||
super().add(
|
||||
f"repo:{repo} type:{','.join(content_types)}",
|
||||
data_type="github",
|
||||
loader=self._loader,
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
github_repo: Optional[str] = None,
|
||||
content_types: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
if github_repo:
|
||||
self.add(
|
||||
repo=github_repo,
|
||||
content_types=content_types,
|
||||
)
|
||||
return super()._run(query=search_query)
|
||||
42
crewai_tools/tools/hyperbrowser_load_tool/README.md
Normal file
42
crewai_tools/tools/hyperbrowser_load_tool/README.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# HyperbrowserLoadTool
|
||||
|
||||
## Description
|
||||
|
||||
[Hyperbrowser](https://hyperbrowser.ai) is a platform for running and scaling headless browsers. It lets you launch and manage browser sessions at scale and provides easy to use solutions for any webscraping needs, such as scraping a single page or crawling an entire site.
|
||||
|
||||
Key Features:
|
||||
- Instant Scalability - Spin up hundreds of browser sessions in seconds without infrastructure headaches
|
||||
- Simple Integration - Works seamlessly with popular tools like Puppeteer and Playwright
|
||||
- Powerful APIs - Easy to use APIs for scraping/crawling any site, and much more
|
||||
- Bypass Anti-Bot Measures - Built-in stealth mode, ad blocking, automatic CAPTCHA solving, and rotating proxies
|
||||
|
||||
For more information about Hyperbrowser, please visit the [Hyperbrowser website](https://hyperbrowser.ai) or if you want to check out the docs, you can visit the [Hyperbrowser docs](https://docs.hyperbrowser.ai).
|
||||
|
||||
## Installation
|
||||
|
||||
- Head to [Hyperbrowser](https://app.hyperbrowser.ai/) to sign up and generate an API key. Once you've done this set the `HYPERBROWSER_API_KEY` environment variable or you can pass it to the `HyperbrowserLoadTool` constructor.
|
||||
- Install the [Hyperbrowser SDK](https://github.com/hyperbrowserai/python-sdk):
|
||||
|
||||
```
|
||||
pip install hyperbrowser 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Utilize the HyperbrowserLoadTool as follows to allow your agent to load websites:
|
||||
|
||||
```python
|
||||
from crewai_tools import HyperbrowserLoadTool
|
||||
|
||||
tool = HyperbrowserLoadTool()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
`__init__` arguments:
|
||||
- `api_key`: Optional. Specifies Hyperbrowser API key. Defaults to the `HYPERBROWSER_API_KEY` environment variable.
|
||||
|
||||
`run` arguments:
|
||||
- `url`: The base URL to start scraping or crawling from.
|
||||
- `operation`: Optional. Specifies the operation to perform on the website. Either 'scrape' or 'crawl'. Defaults is 'scrape'.
|
||||
- `params`: Optional. Specifies the params for the operation. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait.
|
||||
@@ -0,0 +1,107 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type, Dict, Literal, Union, List
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class HyperbrowserLoadToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
operation: Literal['scrape', 'crawl'] = Field(description="Operation to perform on the website. Either 'scrape' or 'crawl'")
|
||||
params: Optional[Dict] = Field(description="Optional params for scrape or crawl. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait")
|
||||
|
||||
class HyperbrowserLoadTool(BaseTool):
|
||||
"""HyperbrowserLoadTool.
|
||||
|
||||
Scrape or crawl web pages and load the contents with optional parameters for configuring content extraction.
|
||||
Requires the `hyperbrowser` package.
|
||||
Get your API Key from https://app.hyperbrowser.ai/
|
||||
|
||||
Args:
|
||||
api_key: The Hyperbrowser API key, can be set as an environment variable `HYPERBROWSER_API_KEY` or passed directly
|
||||
"""
|
||||
name: str = "Hyperbrowser web load tool"
|
||||
description: str = "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html"
|
||||
args_schema: Type[BaseModel] = HyperbrowserLoadToolSchema
|
||||
api_key: Optional[str] = None
|
||||
hyperbrowser: Optional[Any] = None
|
||||
package_dependencies: List[str] = ["hyperbrowser"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="HYPERBROWSER_API_KEY", description="API key for Hyperbrowser services", required=False),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.api_key = api_key or os.getenv('HYPERBROWSER_API_KEY')
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"`api_key` is required, please set the `HYPERBROWSER_API_KEY` environment variable or pass it directly"
|
||||
)
|
||||
|
||||
try:
|
||||
from hyperbrowser import Hyperbrowser
|
||||
except ImportError:
|
||||
raise ImportError("`hyperbrowser` package not found, please run `pip install hyperbrowser`")
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError("HYPERBROWSER_API_KEY is not set. Please provide it either via the constructor with the `api_key` argument or by setting the HYPERBROWSER_API_KEY environment variable.")
|
||||
|
||||
self.hyperbrowser = Hyperbrowser(api_key=self.api_key)
|
||||
|
||||
def _prepare_params(self, params: Dict) -> Dict:
|
||||
"""Prepare session and scrape options parameters."""
|
||||
try:
|
||||
from hyperbrowser.models.session import CreateSessionParams
|
||||
from hyperbrowser.models.scrape import ScrapeOptions
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`hyperbrowser` package not found, please run `pip install hyperbrowser`"
|
||||
)
|
||||
|
||||
if "scrape_options" in params:
|
||||
if "formats" in params["scrape_options"]:
|
||||
formats = params["scrape_options"]["formats"]
|
||||
if not all(fmt in ["markdown", "html"] for fmt in formats):
|
||||
raise ValueError("formats can only contain 'markdown' or 'html'")
|
||||
|
||||
if "session_options" in params:
|
||||
params["session_options"] = CreateSessionParams(**params["session_options"])
|
||||
if "scrape_options" in params:
|
||||
params["scrape_options"] = ScrapeOptions(**params["scrape_options"])
|
||||
return params
|
||||
|
||||
def _extract_content(self, data: Union[Any, None]):
|
||||
"""Extract content from response data."""
|
||||
content = ""
|
||||
if data:
|
||||
content = data.markdown or data.html or ""
|
||||
return content
|
||||
|
||||
def _run(self, url: str, operation: Literal['scrape', 'crawl'] = 'scrape', params: Optional[Dict] = {}):
|
||||
try:
|
||||
from hyperbrowser.models.scrape import StartScrapeJobParams
|
||||
from hyperbrowser.models.crawl import StartCrawlJobParams
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`hyperbrowser` package not found, please run `pip install hyperbrowser`"
|
||||
)
|
||||
|
||||
params = self._prepare_params(params)
|
||||
|
||||
if operation == 'scrape':
|
||||
scrape_params = StartScrapeJobParams(url=url, **params)
|
||||
scrape_resp = self.hyperbrowser.scrape.start_and_wait(scrape_params)
|
||||
content = self._extract_content(scrape_resp.data)
|
||||
return content
|
||||
else:
|
||||
crawl_params = StartCrawlJobParams(url=url, **params)
|
||||
crawl_resp = self.hyperbrowser.crawl.start_and_wait(crawl_params)
|
||||
content = ""
|
||||
if crawl_resp.data:
|
||||
for page in crawl_resp.data:
|
||||
page_content = self._extract_content(page)
|
||||
if page_content:
|
||||
content += (
|
||||
f"\n{'-'*50}\nUrl: {page.url}\nContent:\n{page_content}\n"
|
||||
)
|
||||
return content
|
||||
159
crewai_tools/tools/invoke_crewai_automation_tool/README.md
Normal file
159
crewai_tools/tools/invoke_crewai_automation_tool/README.md
Normal file
@@ -0,0 +1,159 @@
|
||||
# InvokeCrewAIAutomationTool
|
||||
|
||||
## Description
|
||||
|
||||
The InvokeCrewAIAutomationTool provides CrewAI Platform API integration with external crew services. This tool allows you to invoke and interact with CrewAI Platform automations from within your CrewAI agents, enabling seamless integration between different crew workflows.
|
||||
|
||||
## Features
|
||||
|
||||
- **Dynamic Input Schema**: Configure custom input parameters for different crew automations
|
||||
- **Automatic Polling**: Automatically polls for task completion with configurable timeout
|
||||
- **Bearer Token Authentication**: Secure API authentication using bearer tokens
|
||||
- **Comprehensive Error Handling**: Robust error handling for API failures and timeouts
|
||||
- **Flexible Configuration**: Support for both simple and complex crew automation workflows
|
||||
|
||||
## Installation
|
||||
|
||||
Install the required dependencies:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from crewai_tools import InvokeCrewAIAutomationTool
|
||||
|
||||
# Basic crew automation tool
|
||||
tool = InvokeCrewAIAutomationTool(
|
||||
crew_api_url="https://data-analysis-crew-[...].crewai.com",
|
||||
crew_bearer_token="your_bearer_token_here",
|
||||
crew_name="Data Analysis Crew",
|
||||
crew_description="Analyzes data and generates insights"
|
||||
)
|
||||
|
||||
# Use the tool
|
||||
result = tool.run()
|
||||
```
|
||||
|
||||
### Advanced Usage with Custom Inputs
|
||||
|
||||
```python
|
||||
from crewai_tools import InvokeCrewAIAutomationTool
|
||||
from pydantic import Field
|
||||
|
||||
# Define custom input schema
|
||||
custom_inputs = {
|
||||
"year": Field(..., description="Year to retrieve the report for (integer)"),
|
||||
"region": Field(default="global", description="Geographic region for analysis"),
|
||||
"format": Field(default="summary", description="Report format (summary, detailed, raw)")
|
||||
}
|
||||
|
||||
# Create tool with custom inputs
|
||||
tool = InvokeCrewAIAutomationTool(
|
||||
crew_api_url="https://state-of-ai-report-crew-[...].crewai.com",
|
||||
crew_bearer_token="your_bearer_token_here",
|
||||
crew_name="State of AI Report",
|
||||
crew_description="Retrieves a comprehensive report on state of AI for a given year and region",
|
||||
crew_inputs=custom_inputs,
|
||||
max_polling_time=15 * 60 # 15 minutes timeout
|
||||
)
|
||||
|
||||
# Use with custom parameters
|
||||
result = tool.run(year=2024, region="north-america", format="detailed")
|
||||
```
|
||||
|
||||
### Integration with CrewAI Agents
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools import InvokeCrewAIAutomationTool
|
||||
|
||||
# Create the automation tool
|
||||
market_research_tool = InvokeCrewAIAutomationTool(
|
||||
crew_api_url="https://market-research-automation-crew-[...].crewai.com",
|
||||
crew_bearer_token="your_bearer_token_here",
|
||||
crew_name="Market Research Automation",
|
||||
crew_description="Conducts comprehensive market research analysis",
|
||||
inputs={
|
||||
"year": Field(..., description="Year to use for the market research"),
|
||||
}
|
||||
)
|
||||
|
||||
# Create an agent with the tool
|
||||
research_agent = Agent(
|
||||
role="Research Coordinator",
|
||||
goal="Coordinate and execute market research tasks",
|
||||
backstory="You are an expert at coordinating research tasks and leveraging automation tools.",
|
||||
tools=[market_research_tool],
|
||||
verbose=True
|
||||
)
|
||||
|
||||
# Create and execute a task
|
||||
research_task = Task(
|
||||
description="Conduct market research on AI tools market for 2024",
|
||||
agent=research_agent,
|
||||
expected_output="Comprehensive market research report"
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[research_agent],
|
||||
tasks=[research_task]
|
||||
)
|
||||
|
||||
result = crew.kickoff()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
### Required Parameters
|
||||
|
||||
- `crew_api_url` (str): Base URL of the CrewAI Platform automation API
|
||||
- `crew_bearer_token` (str): Bearer token for API authentication
|
||||
- `crew_name` (str): Name of the crew automation
|
||||
- `crew_description` (str): Description of what the crew automation does
|
||||
|
||||
### Optional Parameters
|
||||
|
||||
- `max_polling_time` (int): Maximum time in seconds to wait for task completion (default: 600 seconds = 10 minutes)
|
||||
- `crew_inputs` (dict): Dictionary defining custom input schema fields using Pydantic Field objects
|
||||
|
||||
## Custom Input Schema
|
||||
|
||||
When defining `crew_inputs`, use Pydantic Field objects to specify the input parameters. These have to be compatible with the crew automation you are invoking:
|
||||
|
||||
```python
|
||||
from pydantic import Field
|
||||
|
||||
crew_inputs = {
|
||||
"required_param": Field(..., description="This parameter is required"),
|
||||
"optional_param": Field(default="default_value", description="This parameter is optional"),
|
||||
"typed_param": Field(..., description="Integer parameter", ge=1, le=100) # With validation
|
||||
}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The tool provides comprehensive error handling for common scenarios:
|
||||
|
||||
- **API Connection Errors**: Network connectivity issues
|
||||
- **Authentication Errors**: Invalid or expired bearer tokens
|
||||
- **Timeout Errors**: Tasks that exceed the maximum polling time
|
||||
- **Task Failures**: Crew automations that fail during execution
|
||||
|
||||
## API Endpoints
|
||||
|
||||
The tool interacts with two main API endpoints:
|
||||
|
||||
- `POST {crew_api_url}/kickoff`: Starts a new crew automation task
|
||||
- `GET {crew_api_url}/status/{crew_id}`: Checks the status of a running task
|
||||
|
||||
## Notes
|
||||
|
||||
- The tool automatically polls the status endpoint every second until completion or timeout
|
||||
- Successful tasks return the result directly, while failed tasks return error information
|
||||
- The bearer token should be kept secure and not hardcoded in production environments
|
||||
- Consider using environment variables for sensitive configuration like bearer tokens
|
||||
@@ -0,0 +1,176 @@
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field, create_model
|
||||
from typing import Any, Type
|
||||
import requests
|
||||
import time
|
||||
|
||||
class InvokeCrewAIAutomationInput(BaseModel):
|
||||
"""Input schema for InvokeCrewAIAutomationTool."""
|
||||
prompt: str = Field(..., description="The prompt or query to send to the crew")
|
||||
|
||||
class InvokeCrewAIAutomationTool(BaseTool):
|
||||
"""
|
||||
A CrewAI tool for invoking external crew/flows APIs.
|
||||
|
||||
This tool provides CrewAI Platform API integration with external crew services, supporting:
|
||||
- Dynamic input schema configuration
|
||||
- Automatic polling for task completion
|
||||
- Bearer token authentication
|
||||
- Comprehensive error handling
|
||||
|
||||
Example:
|
||||
Basic usage:
|
||||
>>> tool = InvokeCrewAIAutomationTool(
|
||||
... crew_api_url="https://api.example.com",
|
||||
... crew_bearer_token="your_token",
|
||||
... crew_name="My Crew",
|
||||
... crew_description="Description of what the crew does"
|
||||
... )
|
||||
|
||||
With custom inputs:
|
||||
>>> custom_inputs = {
|
||||
... "param1": Field(..., description="Description of param1"),
|
||||
... "param2": Field(default="default_value", description="Description of param2")
|
||||
... }
|
||||
>>> tool = InvokeCrewAIAutomationTool(
|
||||
... crew_api_url="https://api.example.com",
|
||||
... crew_bearer_token="your_token",
|
||||
... crew_name="My Crew",
|
||||
... crew_description="Description of what the crew does",
|
||||
... crew_inputs=custom_inputs
|
||||
... )
|
||||
|
||||
Example:
|
||||
>>> tools=[
|
||||
... InvokeCrewAIAutomationTool(
|
||||
... crew_api_url="https://canary-crew-[...].crewai.com",
|
||||
... crew_bearer_token="[Your token: abcdef012345]",
|
||||
... crew_name="State of AI Report",
|
||||
... crew_description="Retrieves a report on state of AI for a given year.",
|
||||
... crew_inputs={
|
||||
... "year": Field(..., description="Year to retrieve the report for (integer)")
|
||||
... }
|
||||
... )
|
||||
... ]
|
||||
"""
|
||||
name: str = "invoke_amp_automation"
|
||||
description: str = "Invokes an CrewAI Platform Automation using API"
|
||||
args_schema: Type[BaseModel] = InvokeCrewAIAutomationInput
|
||||
|
||||
crew_api_url: str
|
||||
crew_bearer_token: str
|
||||
max_polling_time: int = 10 * 60 # 10 minutes
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
crew_api_url: str,
|
||||
crew_bearer_token: str,
|
||||
crew_name: str,
|
||||
crew_description: str,
|
||||
max_polling_time: int = 10 * 60,
|
||||
crew_inputs: dict[str, Any] = None):
|
||||
"""
|
||||
Initialize the InvokeCrewAIAutomationTool.
|
||||
|
||||
Args:
|
||||
crew_api_url: Base URL of the crew API service
|
||||
crew_bearer_token: Bearer token for API authentication
|
||||
crew_name: Name of the crew to invoke
|
||||
crew_description: Description of the crew to invoke
|
||||
max_polling_time: Maximum time in seconds to wait for task completion (default: 600 seconds = 10 minutes)
|
||||
crew_inputs: Optional dictionary defining custom input schema fields
|
||||
"""
|
||||
# Create dynamic args_schema if custom inputs provided
|
||||
if crew_inputs:
|
||||
# Start with the base prompt field
|
||||
fields = {}
|
||||
|
||||
# Add custom fields
|
||||
for field_name, field_def in crew_inputs.items():
|
||||
if isinstance(field_def, tuple):
|
||||
fields[field_name] = field_def
|
||||
else:
|
||||
# Assume it's a Field object, extract type from annotation if available
|
||||
fields[field_name] = (str, field_def)
|
||||
|
||||
# Create dynamic model
|
||||
args_schema = create_model('DynamicInvokeCrewAIAutomationInput', **fields)
|
||||
else:
|
||||
args_schema = InvokeCrewAIAutomationInput
|
||||
|
||||
# Initialize the parent class with proper field values
|
||||
super().__init__(
|
||||
name=crew_name,
|
||||
description=crew_description,
|
||||
args_schema=args_schema,
|
||||
crew_api_url=crew_api_url,
|
||||
crew_bearer_token=crew_bearer_token,
|
||||
max_polling_time=max_polling_time
|
||||
)
|
||||
|
||||
def _kickoff_crew(self, inputs: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Start a new crew task
|
||||
|
||||
Args:
|
||||
inputs: Dictionary containing the query and other input parameters
|
||||
|
||||
Returns:
|
||||
Dictionary containing the crew task response. The response will contain the crew id which needs to be returned to check the status of the crew.
|
||||
"""
|
||||
response = requests.post(
|
||||
f"{self.crew_api_url}/kickoff",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.crew_bearer_token}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={"inputs": inputs},
|
||||
)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
def _get_crew_status(self, crew_id: str) -> dict[str, Any]:
|
||||
"""Get the status of a crew task
|
||||
|
||||
Args:
|
||||
crew_id: The ID of the crew task to check
|
||||
|
||||
Returns:
|
||||
Dictionary containing the crew task status
|
||||
"""
|
||||
response = requests.get(
|
||||
f"{self.crew_api_url}/status/{crew_id}",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.crew_bearer_token}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
return response.json()
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
"""Execute the crew invocation tool."""
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
|
||||
# Start the crew
|
||||
response = self._kickoff_crew(inputs=kwargs)
|
||||
|
||||
if response.get("kickoff_id") is None:
|
||||
return f"Error: Failed to kickoff crew. Response: {response}"
|
||||
|
||||
kickoff_id = response.get("kickoff_id")
|
||||
|
||||
# Poll for completion
|
||||
for i in range(self.max_polling_time):
|
||||
try:
|
||||
status_response = self._get_crew_status(crew_id=kickoff_id)
|
||||
if status_response.get("state", "").lower() == "success":
|
||||
return status_response.get("result", "No result returned")
|
||||
elif status_response.get("state", "").lower() == "failed":
|
||||
return f"Error: Crew task failed. Response: {status_response}"
|
||||
except Exception as e:
|
||||
if i == self.max_polling_time - 1: # Last attempt
|
||||
return f"Error: Failed to get crew status after {self.max_polling_time} attempts. Last error: {e}"
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
return f"Error: Crew did not complete within {self.max_polling_time} seconds"
|
||||
38
crewai_tools/tools/jina_scrape_website_tool/README.md
Normal file
38
crewai_tools/tools/jina_scrape_website_tool/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# JinaScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
```python
|
||||
from crewai_tools import JinaScrapeWebsiteTool
|
||||
|
||||
# To enable scraping any website it finds during its execution
|
||||
tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY')
|
||||
|
||||
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
|
||||
tool = JinaScrapeWebsiteTool(website_url='https://www.example.com')
|
||||
|
||||
# With custom headers
|
||||
tool = JinaScrapeWebsiteTool(
|
||||
website_url='https://www.example.com',
|
||||
custom_headers={'X-Target-Selector': 'body, .class, #id'}
|
||||
)
|
||||
```
|
||||
|
||||
## Authentication
|
||||
The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key.
|
||||
|
||||
## Arguments
|
||||
- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read.
|
||||
- `api_key`: Optional Jina.ai API key for authenticated access to the reader service.
|
||||
- `custom_headers`: Optional dictionary of HTTP headers to use when making requests.
|
||||
|
||||
## Note
|
||||
This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities.
|
||||
@@ -0,0 +1,52 @@
|
||||
from typing import Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class JinaScrapeWebsiteToolInput(BaseModel):
|
||||
"""Input schema for JinaScrapeWebsiteTool."""
|
||||
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
|
||||
|
||||
class JinaScrapeWebsiteTool(BaseTool):
|
||||
name: str = "JinaScrapeWebsiteTool"
|
||||
description: str = "A tool that can be used to read a website content using Jina.ai reader and return markdown content."
|
||||
args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput
|
||||
website_url: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
headers: dict = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
website_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
custom_headers: Optional[dict] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.description = f"A tool that can be used to read {website_url}'s content and return markdown content."
|
||||
self._generate_description()
|
||||
|
||||
if custom_headers is not None:
|
||||
self.headers = custom_headers
|
||||
|
||||
if api_key is not None:
|
||||
self.headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
def _run(self, website_url: Optional[str] = None) -> str:
|
||||
url = website_url or self.website_url
|
||||
if not url:
|
||||
raise ValueError(
|
||||
"Website URL must be provided either during initialization or execution"
|
||||
)
|
||||
|
||||
response = requests.get(
|
||||
f"https://r.jina.ai/{url}", headers=self.headers, timeout=15
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
55
crewai_tools/tools/json_search_tool/README.md
Normal file
55
crewai_tools/tools/json_search_tool/README.md
Normal file
@@ -0,0 +1,55 @@
|
||||
# JSONSearchTool
|
||||
|
||||
## Description
|
||||
This tool is used to perform a RAG search within a JSON file's content. It allows users to initiate a search with a specific JSON path, focusing the search operation within that particular JSON file. If the path is provided at initialization, the tool restricts its search scope to the specified JSON file, thereby enhancing the precision of search results.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package by executing the following command in your terminal:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
Below are examples demonstrating how to use the JSONSearchTool for searching within JSON files. You can either search any JSON content or restrict the search to a specific JSON file.
|
||||
|
||||
```python
|
||||
from crewai_tools import JSONSearchTool
|
||||
|
||||
# Example 1: Initialize the tool for a general search across any JSON content. This is useful when the path is known or can be discovered during execution.
|
||||
tool = JSONSearchTool()
|
||||
|
||||
# Example 2: Initialize the tool with a specific JSON path, limiting the search to a particular JSON file.
|
||||
tool = JSONSearchTool(json_path='./path/to/your/file.json')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `json_path` (str): An optional argument that defines the path to the JSON file to be searched. This parameter is only necessary if the tool is initialized without a specific JSON path. Providing this argument restricts the search to the specified JSON file.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = JSONSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
47
crewai_tools/tools/json_search_tool/json_search_tool.py
Normal file
47
crewai_tools/tools/json_search_tool/json_search_tool.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from typing import Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedJSONSearchToolSchema(BaseModel):
|
||||
"""Input for JSONSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the JSON's content",
|
||||
)
|
||||
|
||||
|
||||
class JSONSearchToolSchema(FixedJSONSearchToolSchema):
|
||||
"""Input for JSONSearchTool."""
|
||||
|
||||
json_path: str = Field(
|
||||
..., description="File path or URL of a JSON file to be searched"
|
||||
)
|
||||
|
||||
|
||||
class JSONSearchTool(RagTool):
|
||||
name: str = "Search a JSON's content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a JSON's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = JSONSearchToolSchema
|
||||
|
||||
def __init__(self, json_path: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if json_path is not None:
|
||||
self.add(json_path)
|
||||
self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content."
|
||||
self.args_schema = FixedJSONSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
json_path: Optional[str] = None,
|
||||
) -> str:
|
||||
if json_path is not None:
|
||||
self.add(json_path)
|
||||
return super()._run(query=search_query)
|
||||
98
crewai_tools/tools/linkup/README.md
Normal file
98
crewai_tools/tools/linkup/README.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# Linkup Search Tool
|
||||
|
||||
## Description
|
||||
|
||||
The `LinkupSearchTool` is a tool designed for integration with the CrewAI framework. It provides the ability to query the Linkup API for contextual information and retrieve structured results. This tool is ideal for enriching workflows with up-to-date and reliable information from Linkup.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- Perform API queries to the Linkup platform using customizable parameters (`query`, `depth`, `output_type`).
|
||||
- Gracefully handles API errors and provides structured feedback.
|
||||
- Returns well-structured results for seamless integration into CrewAI processes.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Linkup API Key
|
||||
|
||||
### Steps
|
||||
|
||||
1. ```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
2. Create a `.env` file in your project root and add your Linkup API Key:
|
||||
```plaintext
|
||||
LINKUP_API_KEY=your_linkup_api_key
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Example
|
||||
|
||||
Here is how to use the `LinkupSearchTool` in a CrewAI project:
|
||||
|
||||
1. **Import and Initialize**:
|
||||
```python
|
||||
from tools.linkup_tools import LinkupSearchTool
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
linkup_tool = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY"))
|
||||
```
|
||||
|
||||
2. **Set Up an Agent and Task**:
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
# Define the agent
|
||||
research_agent = Agent(
|
||||
role="Information Researcher",
|
||||
goal="Fetch relevant results from Linkup.",
|
||||
backstory="An expert in online information retrieval...",
|
||||
tools=[linkup_tool],
|
||||
verbose=True
|
||||
)
|
||||
|
||||
# Define the task
|
||||
search_task = Task(
|
||||
expected_output="A detailed list of Nobel Prize-winning women in physics with their achievements.",
|
||||
description="Search for women who have won the Nobel Prize in Physics.",
|
||||
agent=research_agent
|
||||
)
|
||||
|
||||
# Create and run the crew
|
||||
crew = Crew(
|
||||
agents=[research_agent],
|
||||
tasks=[search_task]
|
||||
)
|
||||
|
||||
result = crew.kickoff()
|
||||
print(result)
|
||||
```
|
||||
|
||||
### Advanced Configuration
|
||||
|
||||
You can customize the parameters for the `LinkupSearchTool`:
|
||||
|
||||
- `query`: The search term or phrase.
|
||||
- `depth`: The search depth (`"standard"` by default).
|
||||
- `output_type`: The type of output (`"searchResults"` by default).
|
||||
|
||||
Example:
|
||||
```python
|
||||
response = linkup_tool._run(
|
||||
query="Women Nobel Prize Physics",
|
||||
depth="standard",
|
||||
output_type="searchResults"
|
||||
)
|
||||
```
|
||||
BIN
crewai_tools/tools/linkup/assets/icon.png
Normal file
BIN
crewai_tools/tools/linkup/assets/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 32 KiB |
78
crewai_tools/tools/linkup/linkup_search_tool.py
Normal file
78
crewai_tools/tools/linkup/linkup_search_tool.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os
|
||||
from typing import Any, List
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
|
||||
try:
|
||||
from linkup import LinkupClient
|
||||
|
||||
LINKUP_AVAILABLE = True
|
||||
except ImportError:
|
||||
LINKUP_AVAILABLE = False
|
||||
LinkupClient = Any # type placeholder when package is not available
|
||||
|
||||
from pydantic import PrivateAttr
|
||||
|
||||
|
||||
class LinkupSearchTool(BaseTool):
|
||||
name: str = "Linkup Search Tool"
|
||||
description: str = (
|
||||
"Performs an API call to Linkup to retrieve contextual information."
|
||||
)
|
||||
_client: LinkupClient = PrivateAttr() # type: ignore
|
||||
description: str = (
|
||||
"Performs an API call to Linkup to retrieve contextual information."
|
||||
)
|
||||
_client: LinkupClient = PrivateAttr() # type: ignore
|
||||
package_dependencies: List[str] = ["linkup-sdk"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="LINKUP_API_KEY", description="API key for Linkup", required=True),
|
||||
]
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
"""
|
||||
Initialize the tool with an API key.
|
||||
"""
|
||||
super().__init__()
|
||||
try:
|
||||
from linkup import LinkupClient
|
||||
except ImportError:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'linkup-sdk' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "linkup-sdk"], check=True)
|
||||
from linkup import LinkupClient
|
||||
|
||||
else:
|
||||
raise ImportError(
|
||||
"The 'linkup-sdk' package is required to use the LinkupSearchTool. "
|
||||
"Please install it with: uv add linkup-sdk"
|
||||
)
|
||||
self._client = LinkupClient(api_key=api_key or os.getenv("LINKUP_API_KEY"))
|
||||
|
||||
def _run(
|
||||
self, query: str, depth: str = "standard", output_type: str = "searchResults"
|
||||
) -> dict:
|
||||
"""
|
||||
Executes a search using the Linkup API.
|
||||
|
||||
:param query: The query to search for.
|
||||
:param depth: Search depth (default is "standard").
|
||||
:param output_type: Desired result type (default is "searchResults").
|
||||
:return: A dictionary containing the results or an error message.
|
||||
"""
|
||||
try:
|
||||
response = self._client.search(
|
||||
query=query, depth=depth, output_type=output_type
|
||||
)
|
||||
results = [
|
||||
{"name": result.name, "url": result.url, "content": result.content}
|
||||
for result in response.results
|
||||
]
|
||||
return {"success": True, "results": results}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
53
crewai_tools/tools/llamaindex_tool/README.md
Normal file
53
crewai_tools/tools/llamaindex_tool/README.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# LlamaIndexTool Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to be a general wrapper around LlamaIndex tools and query engines, enabling you to leverage LlamaIndex resources
|
||||
in terms of RAG/agentic pipelines as tools to plug into CrewAI agents.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import LlamaIndexTool
|
||||
|
||||
# Initialize the tool from a LlamaIndex Tool
|
||||
|
||||
## Example 1: Initialize from FunctionTool
|
||||
from llama_index.core.tools import FunctionTool
|
||||
|
||||
your_python_function = lambda ...: ...
|
||||
og_tool = FunctionTool.from_defaults(your_python_function, name="<name>", description='<description>')
|
||||
tool = LlamaIndexTool.from_tool(og_tool)
|
||||
|
||||
## Example 2: Initialize from LlamaHub Tools
|
||||
from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
|
||||
wolfram_spec = WolframAlphaToolSpec(app_id="<app_id>")
|
||||
wolfram_tools = wolfram_spec.to_tool_list()
|
||||
tools = [LlamaIndexTool.from_tool(t) for t in wolfram_tools]
|
||||
|
||||
|
||||
# Initialize Tool from a LlamaIndex Query Engine
|
||||
|
||||
## NOTE: LlamaIndex has a lot of query engines, define whatever query engine you want
|
||||
query_engine = index.as_query_engine()
|
||||
query_tool = LlamaIndexTool.from_query_engine(
|
||||
query_engine,
|
||||
name="Uber 2019 10K Query Tool",
|
||||
description="Use this tool to lookup the 2019 Uber 10K Annual Report"
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `LlamaIndexTool`, follow these steps:
|
||||
|
||||
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **Install and use LlamaIndex**: Follow LlamaIndex documentation (https://docs.llamaindex.ai/) to setup a RAG/agent pipeline.
|
||||
|
||||
|
||||
82
crewai_tools/tools/llamaindex_tool/llamaindex_tool.py
Normal file
82
crewai_tools/tools/llamaindex_tool/llamaindex_tool.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from typing import Any, Optional, Type, cast
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class LlamaIndexTool(BaseTool):
|
||||
"""Tool to wrap LlamaIndex tools/query engines."""
|
||||
|
||||
llama_index_tool: Any
|
||||
|
||||
def _run(
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run tool."""
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
|
||||
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
||||
|
||||
if self.result_as_answer:
|
||||
return tool(*args, **kwargs).content
|
||||
|
||||
return tool(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_tool(cls, tool: Any, **kwargs: Any) -> "LlamaIndexTool":
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
|
||||
if not isinstance(tool, LlamaBaseTool):
|
||||
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
||||
tool = cast(LlamaBaseTool, tool)
|
||||
|
||||
if tool.metadata.fn_schema is None:
|
||||
raise ValueError(
|
||||
"The LlamaIndex tool does not have an fn_schema specified."
|
||||
)
|
||||
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
|
||||
|
||||
return cls(
|
||||
name=tool.metadata.name,
|
||||
description=tool.metadata.description,
|
||||
args_schema=args_schema,
|
||||
llama_index_tool=tool,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_query_engine(
|
||||
cls,
|
||||
query_engine: Any,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
return_direct: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> "LlamaIndexTool":
|
||||
from llama_index.core.query_engine import BaseQueryEngine
|
||||
from llama_index.core.tools import QueryEngineTool
|
||||
|
||||
if not isinstance(query_engine, BaseQueryEngine):
|
||||
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
||||
|
||||
# NOTE: by default the schema expects an `input` variable. However this
|
||||
# confuses crewAI so we are renaming to `query`.
|
||||
class QueryToolSchema(BaseModel):
|
||||
"""Schema for query tool."""
|
||||
|
||||
query: str = Field(..., description="Search query for the query tool.")
|
||||
|
||||
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
|
||||
query_engine_tool = QueryEngineTool.from_defaults(
|
||||
query_engine,
|
||||
name=name,
|
||||
description=description,
|
||||
return_direct=return_direct,
|
||||
resolve_input_errors=True,
|
||||
)
|
||||
# HACK: we are replacing the schema with our custom schema
|
||||
query_engine_tool.metadata.fn_schema = QueryToolSchema
|
||||
|
||||
return cls.from_tool(query_engine_tool, **kwargs)
|
||||
57
crewai_tools/tools/mdx_search_tool/README.md
Normal file
57
crewai_tools/tools/mdx_search_tool/README.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# MDXSearchTool
|
||||
|
||||
## Description
|
||||
The MDX Search Tool, a key component of the `crewai_tools` package, is designed for advanced market data extraction, offering invaluable support to researchers and analysts requiring immediate market insights in the AI sector. With its ability to interface with various data sources and tools, it streamlines the process of acquiring, reading, and organizing market data efficiently.
|
||||
|
||||
## Installation
|
||||
To utilize the MDX Search Tool, ensure the `crewai_tools` package is installed. If not already present, install it using the following command:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
Configuring and using the MDX Search Tool involves setting up environment variables and utilizing the tool within a crewAI project for market research. Here's a simple example:
|
||||
|
||||
```python
|
||||
from crewai_tools import MDXSearchTool
|
||||
|
||||
# Initialize the tool so the agent can search any MDX content if it learns about during its execution
|
||||
tool = MDXSearchTool()
|
||||
|
||||
# OR
|
||||
|
||||
# Initialize the tool with a specific MDX file path for exclusive search within that document
|
||||
tool = MDXSearchTool(mdx='path/to/your/document.mdx')
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- mdx: **Optional** The MDX path for the search. Can be provided at initialization
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = MDXSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
56
crewai_tools/tools/mdx_search_tool/mdx_search_tool.py
Normal file
56
crewai_tools/tools/mdx_search_tool/mdx_search_tool.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from typing import Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
try:
|
||||
from embedchain.models.data_type import DataType
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedMDXSearchToolSchema(BaseModel):
|
||||
"""Input for MDXSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the MDX's content",
|
||||
)
|
||||
|
||||
|
||||
class MDXSearchToolSchema(FixedMDXSearchToolSchema):
|
||||
"""Input for MDXSearchTool."""
|
||||
|
||||
mdx: str = Field(..., description="File path or URL of a MDX file to be searched")
|
||||
|
||||
|
||||
class MDXSearchTool(RagTool):
|
||||
name: str = "Search a MDX's content"
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a MDX's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = MDXSearchToolSchema
|
||||
|
||||
def __init__(self, mdx: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if mdx is not None:
|
||||
self.add(mdx)
|
||||
self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content."
|
||||
self.args_schema = FixedMDXSearchToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def add(self, mdx: str) -> None:
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().add(mdx, data_type=DataType.MDX)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
mdx: Optional[str] = None,
|
||||
) -> str:
|
||||
if mdx is not None:
|
||||
self.add(mdx)
|
||||
return super()._run(query=search_query)
|
||||
87
crewai_tools/tools/mongodb_vector_search_tool/README.md
Normal file
87
crewai_tools/tools/mongodb_vector_search_tool/README.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# MongoDBVectorSearchTool
|
||||
|
||||
## Description
|
||||
This tool is specifically crafted for conducting vector searches within docs within a MongoDB database. Use this tool to find semantically similar docs to a given query.
|
||||
|
||||
MongoDB can act as a vector database that is used to store and query vector embeddings. You can follow the docs here:
|
||||
https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package with MongoDB support by executing the following command in your terminal:
|
||||
|
||||
```shell
|
||||
pip install crewai-tools[mongodb]
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
uv add crewai-tools --extra mongodb
|
||||
```
|
||||
|
||||
## Example
|
||||
To utilize the MongoDBVectorSearchTool for different use cases, follow these examples:
|
||||
|
||||
```python
|
||||
from crewai_tools import MongoDBVectorSearchTool
|
||||
|
||||
# To enable the tool to search any website the agent comes across or learns about during its operation
|
||||
tool = MongoDBVectorSearchTool(
|
||||
database_name="example_database',
|
||||
collection_name='example_collections',
|
||||
connection_string="<your_mongodb_connection_string>",
|
||||
)
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```python
|
||||
from crewai_tools import MongoDBVectorSearchConfig, MongoDBVectorSearchTool
|
||||
|
||||
# Setup custom embedding model and customize the parameters.
|
||||
query_config = MongoDBVectorSearchConfig(limit=10, oversampling_factor=2)
|
||||
tool = MongoDBVectorSearchTool(
|
||||
database_name="example_database',
|
||||
collection_name='example_collections',
|
||||
connection_string="<your_mongodb_connection_string>",
|
||||
query_config=query_config,
|
||||
index_name="my_vector_index",
|
||||
generative_model="gpt-4o-mini"
|
||||
)
|
||||
|
||||
# Adding the tool to an agent
|
||||
rag_agent = Agent(
|
||||
name="rag_agent",
|
||||
role="You are a helpful assistant that can answer questions with the help of the MongoDBVectorSearchTool.",
|
||||
goal="...",
|
||||
backstory="...",
|
||||
llm="gpt-4o-mini",
|
||||
tools=[tool],
|
||||
)
|
||||
```
|
||||
|
||||
Preloading the MongoDB database with documents:
|
||||
|
||||
```python
|
||||
from crewai_tools import MongoDBVectorSearchTool
|
||||
|
||||
# Generate the documents and add them to the MongoDB database
|
||||
test_docs = client.collections.get("example_collections")
|
||||
|
||||
# Create the tool.
|
||||
tool = MongoDBVectorSearchTool(
|
||||
database_name="example_database',
|
||||
collection_name='example_collections',
|
||||
connection_string="<your_mongodb_connection_string>",
|
||||
)
|
||||
|
||||
# Add the text from a set of CrewAI knowledge documents.
|
||||
texts = []
|
||||
for d in os.listdir("knowledge"):
|
||||
with open(os.path.join("knowledge", d), "r") as f:
|
||||
texts.append(f.read())
|
||||
tool.add_texts(text)
|
||||
|
||||
# Create the vector search index (if it wasn't already created in Atlas).
|
||||
tool.create_vector_search_index(dimensions=3072)
|
||||
```
|
||||
11
crewai_tools/tools/mongodb_vector_search_tool/__init__.py
Normal file
11
crewai_tools/tools/mongodb_vector_search_tool/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .vector_search import (
|
||||
MongoDBToolSchema,
|
||||
MongoDBVectorSearchConfig,
|
||||
MongoDBVectorSearchTool,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"MongoDBVectorSearchConfig",
|
||||
"MongoDBVectorSearchTool",
|
||||
"MongoDBToolSchema",
|
||||
]
|
||||
120
crewai_tools/tools/mongodb_vector_search_tool/utils.py
Normal file
120
crewai_tools/tools/mongodb_vector_search_tool/utils.py
Normal file
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from time import monotonic, sleep
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pymongo.collection import Collection
|
||||
|
||||
|
||||
def _vector_search_index_definition(
|
||||
dimensions: int,
|
||||
path: str,
|
||||
similarity: str,
|
||||
filters: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
# https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/
|
||||
fields = [
|
||||
{
|
||||
"numDimensions": dimensions,
|
||||
"path": path,
|
||||
"similarity": similarity,
|
||||
"type": "vector",
|
||||
},
|
||||
]
|
||||
if filters:
|
||||
for field in filters:
|
||||
fields.append({"type": "filter", "path": field})
|
||||
definition = {"fields": fields}
|
||||
definition.update(kwargs)
|
||||
return definition
|
||||
|
||||
|
||||
def create_vector_search_index(
|
||||
collection: Collection,
|
||||
index_name: str,
|
||||
dimensions: int,
|
||||
path: str,
|
||||
similarity: str,
|
||||
filters: Optional[List[str]] = None,
|
||||
*,
|
||||
wait_until_complete: Optional[float] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Experimental Utility function to create a vector search index
|
||||
|
||||
Args:
|
||||
collection (Collection): MongoDB Collection
|
||||
index_name (str): Name of Index
|
||||
dimensions (int): Number of dimensions in embedding
|
||||
path (str): field with vector embedding
|
||||
similarity (str): The similarity score used for the index
|
||||
filters (List[str]): Fields/paths to index to allow filtering in $vectorSearch
|
||||
wait_until_complete (Optional[float]): If provided, number of seconds to wait
|
||||
until search index is ready.
|
||||
kwargs: Keyword arguments supplying any additional options to SearchIndexModel.
|
||||
"""
|
||||
from pymongo.operations import SearchIndexModel
|
||||
|
||||
if collection.name not in collection.database.list_collection_names():
|
||||
collection.database.create_collection(collection.name)
|
||||
|
||||
result = collection.create_search_index(
|
||||
SearchIndexModel(
|
||||
definition=_vector_search_index_definition(
|
||||
dimensions=dimensions,
|
||||
path=path,
|
||||
similarity=similarity,
|
||||
filters=filters,
|
||||
**kwargs,
|
||||
),
|
||||
name=index_name,
|
||||
type="vectorSearch",
|
||||
)
|
||||
)
|
||||
|
||||
if wait_until_complete:
|
||||
_wait_for_predicate(
|
||||
predicate=lambda: _is_index_ready(collection, index_name),
|
||||
err=f"{index_name=} did not complete in {wait_until_complete}!",
|
||||
timeout=wait_until_complete,
|
||||
)
|
||||
|
||||
|
||||
def _is_index_ready(collection: Collection, index_name: str) -> bool:
|
||||
"""Check for the index name in the list of available search indexes to see if the
|
||||
specified index is of status READY
|
||||
|
||||
Args:
|
||||
collection (Collection): MongoDB Collection to for the search indexes
|
||||
index_name (str): Vector Search Index name
|
||||
|
||||
Returns:
|
||||
bool : True if the index is present and READY false otherwise
|
||||
"""
|
||||
for index in collection.list_search_indexes(index_name):
|
||||
if index["status"] == "READY":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _wait_for_predicate(
|
||||
predicate: Callable, err: str, timeout: float = 120, interval: float = 0.5
|
||||
) -> None:
|
||||
"""Generic to block until the predicate returns true
|
||||
|
||||
Args:
|
||||
predicate (Callable[, bool]): A function that returns a boolean value
|
||||
err (str): Error message to raise if nothing occurs
|
||||
timeout (float, optional): Wait time for predicate. Defaults to TIMEOUT.
|
||||
interval (float, optional): Interval to check predicate. Defaults to DELAY.
|
||||
|
||||
Raises:
|
||||
TimeoutError: _description_
|
||||
"""
|
||||
start = monotonic()
|
||||
while not predicate():
|
||||
if monotonic() - start > timeout:
|
||||
raise TimeoutError(err)
|
||||
sleep(interval)
|
||||
327
crewai_tools/tools/mongodb_vector_search_tool/vector_search.py
Normal file
327
crewai_tools/tools/mongodb_vector_search_tool/vector_search.py
Normal file
@@ -0,0 +1,327 @@
|
||||
import os
|
||||
from importlib.metadata import version
|
||||
from logging import getLogger
|
||||
from typing import Any, Dict, Iterable, List, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from openai import AzureOpenAI, Client
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.mongodb_vector_search_tool.utils import (
|
||||
create_vector_search_index,
|
||||
)
|
||||
|
||||
try:
|
||||
import pymongo # noqa: F403
|
||||
|
||||
MONGODB_AVAILABLE = True
|
||||
except ImportError:
|
||||
MONGODB_AVAILABLE = False
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class MongoDBVectorSearchConfig(BaseModel):
|
||||
"""Configuration for MongoDB vector search queries."""
|
||||
|
||||
limit: Optional[int] = Field(
|
||||
default=4, description="number of documents to return."
|
||||
)
|
||||
pre_filter: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="List of MQL match expressions comparing an indexed field",
|
||||
)
|
||||
post_filter_pipeline: Optional[list[dict]] = Field(
|
||||
default=None,
|
||||
description="Pipeline of MongoDB aggregation stages to filter/process results after $vectorSearch.",
|
||||
)
|
||||
oversampling_factor: int = Field(
|
||||
default=10,
|
||||
description="Multiple of limit used when generating number of candidates at each step in the HNSW Vector Search",
|
||||
)
|
||||
include_embeddings: bool = Field(
|
||||
default=False,
|
||||
description="Whether to include the embedding vector of each result in metadata.",
|
||||
)
|
||||
|
||||
|
||||
class MongoDBToolSchema(BaseModel):
|
||||
"""Input for MongoDBTool."""
|
||||
|
||||
query: str = Field(
|
||||
...,
|
||||
description="The query to search retrieve relevant information from the MongoDB database. Pass only the query, not the question.",
|
||||
)
|
||||
|
||||
|
||||
class MongoDBVectorSearchTool(BaseTool):
|
||||
"""Tool to perfrom a vector search the MongoDB database"""
|
||||
|
||||
name: str = "MongoDBVectorSearchTool"
|
||||
description: str = "A tool to perfrom a vector search on a MongoDB database for relevant information on internal documents."
|
||||
|
||||
args_schema: Type[BaseModel] = MongoDBToolSchema
|
||||
query_config: Optional[MongoDBVectorSearchConfig] = Field(
|
||||
default=None, description="MongoDB Vector Search query configuration"
|
||||
)
|
||||
embedding_model: str = Field(
|
||||
default="text-embedding-3-large",
|
||||
description="Text OpenAI embedding model to use",
|
||||
)
|
||||
vector_index_name: str = Field(
|
||||
default="vector_index", description="Name of the Atlas Search vector index"
|
||||
)
|
||||
text_key: str = Field(
|
||||
default="text",
|
||||
description="MongoDB field that will contain the text for each document",
|
||||
)
|
||||
embedding_key: str = Field(
|
||||
default="embedding",
|
||||
description="Field that will contain the embedding for each document",
|
||||
)
|
||||
database_name: str = Field(..., description="The name of the MongoDB database")
|
||||
collection_name: str = Field(..., description="The name of the MongoDB collection")
|
||||
connection_string: str = Field(
|
||||
...,
|
||||
description="The connection string of the MongoDB cluster",
|
||||
)
|
||||
dimensions: int = Field(
|
||||
default=1536,
|
||||
description="Number of dimensions in the embedding vector",
|
||||
)
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(
|
||||
name="BROWSERBASE_API_KEY",
|
||||
description="API key for Browserbase services",
|
||||
required=False,
|
||||
),
|
||||
EnvVar(
|
||||
name="BROWSERBASE_PROJECT_ID",
|
||||
description="Project ID for Browserbase services",
|
||||
required=False,
|
||||
),
|
||||
]
|
||||
package_dependencies: List[str] = ["mongdb"]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if not MONGODB_AVAILABLE:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'mongodb' crewai tool. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "pymongo"], check=True)
|
||||
|
||||
else:
|
||||
raise ImportError("You are missing the 'mongodb' crewai tool.")
|
||||
|
||||
if "AZURE_OPENAI_ENDPOINT" in os.environ:
|
||||
self._openai_client = AzureOpenAI()
|
||||
elif "OPENAI_API_KEY" in os.environ:
|
||||
self._openai_client = Client()
|
||||
else:
|
||||
raise ValueError(
|
||||
"OPENAI_API_KEY environment variable is required for MongoDBVectorSearchTool and it is mandatory to use the tool."
|
||||
)
|
||||
|
||||
from pymongo import MongoClient
|
||||
from pymongo.driver_info import DriverInfo
|
||||
|
||||
self._client = MongoClient(
|
||||
self.connection_string,
|
||||
driver=DriverInfo(name="CrewAI", version=version("crewai-tools")),
|
||||
)
|
||||
self._coll = self._client[self.database_name][self.collection_name]
|
||||
|
||||
def create_vector_search_index(
|
||||
self,
|
||||
*,
|
||||
dimensions: int,
|
||||
relevance_score_fn: str = "cosine",
|
||||
auto_index_timeout: int = 15,
|
||||
) -> None:
|
||||
"""Convenience function to create a vector search index.
|
||||
|
||||
Args:
|
||||
dimensions: Number of dimensions in embedding. If the value is set and
|
||||
the index does not exist, an index will be created.
|
||||
relevance_score_fn: The similarity score used for the index
|
||||
Currently supported: 'euclidean', 'cosine', and 'dotProduct'
|
||||
auto_index_timeout: Timeout in seconds to wait for an auto-created index
|
||||
to be ready.
|
||||
"""
|
||||
|
||||
create_vector_search_index(
|
||||
collection=self._coll,
|
||||
index_name=self.vector_index_name,
|
||||
dimensions=dimensions,
|
||||
path=self.embedding_key,
|
||||
similarity=relevance_score_fn,
|
||||
wait_until_complete=auto_index_timeout,
|
||||
)
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[Dict[str, Any]]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
batch_size: int = 100,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Add texts, create embeddings, and add to the Collection and index.
|
||||
|
||||
Important notes on ids:
|
||||
- If _id or id is a key in the metadatas dicts, one must
|
||||
pop them and provide as separate list.
|
||||
- They must be unique.
|
||||
- If they are not provided, the VectorStore will create unique ones,
|
||||
stored as bson.ObjectIds internally, and strings in Langchain.
|
||||
These will appear in Document.metadata with key, '_id'.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of unique ids that will be used as index in VectorStore.
|
||||
See note on ids.
|
||||
batch_size: Number of documents to insert at a time.
|
||||
Tuning this may help with performance and sidestep MongoDB limits.
|
||||
|
||||
Returns:
|
||||
List of ids added to the vectorstore.
|
||||
"""
|
||||
from bson import ObjectId
|
||||
|
||||
_metadatas = metadatas or [{} for _ in texts]
|
||||
ids = [str(ObjectId()) for _ in range(len(list(texts)))]
|
||||
metadatas_batch = _metadatas
|
||||
|
||||
result_ids = []
|
||||
texts_batch = []
|
||||
metadatas_batch = []
|
||||
size = 0
|
||||
i = 0
|
||||
for j, (text, metadata) in enumerate(zip(texts, _metadatas)):
|
||||
size += len(text) + len(metadata)
|
||||
texts_batch.append(text)
|
||||
metadatas_batch.append(metadata)
|
||||
if (j + 1) % batch_size == 0 or size >= 47_000_000:
|
||||
batch_res = self._bulk_embed_and_insert_texts(
|
||||
texts_batch, metadatas_batch, ids[i : j + 1]
|
||||
)
|
||||
result_ids.extend(batch_res)
|
||||
texts_batch = []
|
||||
metadatas_batch = []
|
||||
size = 0
|
||||
i = j + 1
|
||||
if texts_batch:
|
||||
batch_res = self._bulk_embed_and_insert_texts(
|
||||
texts_batch, metadatas_batch, ids[i : j + 1]
|
||||
)
|
||||
result_ids.extend(batch_res)
|
||||
return result_ids
|
||||
|
||||
def _embed_texts(self, texts: List[str]) -> List[List[float]]:
|
||||
return [
|
||||
i.embedding
|
||||
for i in self._openai_client.embeddings.create(
|
||||
input=texts,
|
||||
model=self.embedding_model,
|
||||
dimensions=self.dimensions,
|
||||
).data
|
||||
]
|
||||
|
||||
def _bulk_embed_and_insert_texts(
|
||||
self,
|
||||
texts: List[str],
|
||||
metadatas: List[dict],
|
||||
ids: List[str],
|
||||
) -> List[str]:
|
||||
"""Bulk insert single batch of texts, embeddings, and ids."""
|
||||
from bson import ObjectId
|
||||
from pymongo.operations import ReplaceOne
|
||||
|
||||
if not texts:
|
||||
return []
|
||||
# Compute embedding vectors
|
||||
embeddings = self._embed_texts(texts)
|
||||
docs = [
|
||||
{
|
||||
"_id": ObjectId(i),
|
||||
self.text_key: t,
|
||||
self.embedding_key: embedding,
|
||||
**m,
|
||||
}
|
||||
for i, t, m, embedding in zip(ids, texts, metadatas, embeddings)
|
||||
]
|
||||
operations = [ReplaceOne({"_id": doc["_id"]}, doc, upsert=True) for doc in docs]
|
||||
# insert the documents in MongoDB Atlas
|
||||
result = self._coll.bulk_write(operations)
|
||||
assert result.upserted_ids is not None
|
||||
return [str(_id) for _id in result.upserted_ids.values()]
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
from bson import json_util
|
||||
|
||||
try:
|
||||
query_config = self.query_config or MongoDBVectorSearchConfig()
|
||||
limit = query_config.limit
|
||||
oversampling_factor = query_config.oversampling_factor
|
||||
pre_filter = query_config.pre_filter
|
||||
include_embeddings = query_config.include_embeddings
|
||||
post_filter_pipeline = query_config.post_filter_pipeline
|
||||
|
||||
# Create the embedding for the query
|
||||
query_vector = self._embed_texts([query])[0]
|
||||
|
||||
# Atlas Vector Search, potentially with filter
|
||||
stage = {
|
||||
"index": self.vector_index_name,
|
||||
"path": self.embedding_key,
|
||||
"queryVector": query_vector,
|
||||
"numCandidates": limit * oversampling_factor,
|
||||
"limit": limit,
|
||||
}
|
||||
if pre_filter:
|
||||
stage["filter"] = pre_filter
|
||||
|
||||
pipeline = [
|
||||
{"$vectorSearch": stage},
|
||||
{"$set": {"score": {"$meta": "vectorSearchScore"}}},
|
||||
]
|
||||
|
||||
# Remove embeddings unless requested
|
||||
if not include_embeddings:
|
||||
pipeline.append({"$project": {self.embedding_key: 0}})
|
||||
|
||||
# Post-processing
|
||||
if post_filter_pipeline is not None:
|
||||
pipeline.extend(post_filter_pipeline)
|
||||
|
||||
# Execution
|
||||
cursor = self._coll.aggregate(pipeline) # type: ignore[arg-type]
|
||||
docs = []
|
||||
|
||||
# Format
|
||||
for doc in cursor:
|
||||
docs.append(doc)
|
||||
return json_util.dumps(docs)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return ""
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup clients on deletion."""
|
||||
try:
|
||||
if hasattr(self, "_client") and self._client:
|
||||
self._client.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
|
||||
try:
|
||||
if hasattr(self, "_openai_client") and self._openai_client:
|
||||
self._openai_client.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
53
crewai_tools/tools/multion_tool/README.md
Normal file
53
crewai_tools/tools/multion_tool/README.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# MultiOnTool Documentation
|
||||
|
||||
## Description
|
||||
The MultiOnTool, integrated within the crewai_tools package, empowers CrewAI agents with the capability to navigate and interact with the web through natural language instructions. Leveraging the Multion API, this tool facilitates seamless web browsing, making it an essential asset for projects requiring dynamic web data interaction.
|
||||
|
||||
## Installation
|
||||
Ensure the `crewai[tools]` package is installed in your environment to use the MultiOnTool. If it's not already installed, you can add it using the command below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools import MultiOnTool
|
||||
|
||||
# Initialize the tool from a MultiOn Tool
|
||||
multion_tool = MultiOnTool(api_key= "YOUR_MULTION_API_KEY", local=False)
|
||||
|
||||
Browser = Agent(
|
||||
role="Browser Agent",
|
||||
goal="control web browsers using natural language ",
|
||||
backstory="An expert browsing agent.",
|
||||
tools=[multion_remote_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# example task to search and summarize news
|
||||
browse = Task(
|
||||
description="Summarize the top 3 trending AI News headlines",
|
||||
expected_output="A summary of the top 3 trending AI News headlines",
|
||||
agent=Browser,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[Browser], tasks=[browse])
|
||||
|
||||
crew.kickoff()
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `api_key`: Specifies MultiOn API key. Default is the `MULTION_API_KEY` environment variable.
|
||||
- `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked.
|
||||
- `max_steps`: Optional. Set the max_steps the multion agent can take for a command
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `MultiOnTool`, follow these steps:
|
||||
|
||||
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension).
|
||||
3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API
|
||||
29
crewai_tools/tools/multion_tool/example.py
Normal file
29
crewai_tools/tools/multion_tool/example.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
|
||||
from crewai import Agent, Crew, Task
|
||||
from multion_tool import MultiOnTool
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "Your Key"
|
||||
|
||||
multion_browse_tool = MultiOnTool(api_key="Your Key")
|
||||
|
||||
# Create a new agent
|
||||
Browser = Agent(
|
||||
role="Browser Agent",
|
||||
goal="control web browsers using natural language ",
|
||||
backstory="An expert browsing agent.",
|
||||
tools=[multion_browse_tool],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Define tasks
|
||||
browse = Task(
|
||||
description="Summarize the top 3 trending AI News headlines",
|
||||
expected_output="A summary of the top 3 trending AI News headlines",
|
||||
agent=Browser,
|
||||
)
|
||||
|
||||
|
||||
crew = Crew(agents=[Browser], tasks=[browse])
|
||||
|
||||
crew.kickoff()
|
||||
80
crewai_tools/tools/multion_tool/multion_tool.py
Normal file
80
crewai_tools/tools/multion_tool/multion_tool.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Multion tool spec."""
|
||||
|
||||
import os
|
||||
from typing import Any, Optional, List
|
||||
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
|
||||
|
||||
class MultiOnTool(BaseTool):
|
||||
"""Tool to wrap MultiOn Browse Capabilities."""
|
||||
|
||||
name: str = "Multion Browse Tool"
|
||||
description: str = """Multion gives the ability for LLMs to control web browsers using natural language instructions.
|
||||
If the status is 'CONTINUE', reissue the same instruction to continue execution
|
||||
"""
|
||||
multion: Optional[Any] = None
|
||||
session_id: Optional[str] = None
|
||||
local: bool = False
|
||||
max_steps: int = 3
|
||||
package_dependencies: List[str] = ["multion"]
|
||||
env_vars: List[EnvVar] = [
|
||||
EnvVar(name="MULTION_API_KEY", description="API key for Multion", required=True),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
local: bool = False,
|
||||
max_steps: int = 3,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from multion.client import MultiOn # type: ignore
|
||||
except ImportError:
|
||||
import click
|
||||
|
||||
if click.confirm(
|
||||
"You are missing the 'multion' package. Would you like to install it?"
|
||||
):
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["uv", "add", "multion"], check=True)
|
||||
from multion.client import MultiOn
|
||||
else:
|
||||
raise ImportError(
|
||||
"`multion` package not found, please run `uv add multion`"
|
||||
)
|
||||
self.session_id = None
|
||||
self.local = local
|
||||
self.multion = MultiOn(api_key=api_key or os.getenv("MULTION_API_KEY"))
|
||||
self.max_steps = max_steps
|
||||
|
||||
def _run(
|
||||
self,
|
||||
cmd: str,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""
|
||||
Run the Multion client with the given command.
|
||||
|
||||
Args:
|
||||
cmd (str): The detailed and specific natural language instructrion for web browsing
|
||||
|
||||
*args (Any): Additional arguments to pass to the Multion client
|
||||
**kwargs (Any): Additional keyword arguments to pass to the Multion client
|
||||
"""
|
||||
|
||||
browse = self.multion.browse(
|
||||
cmd=cmd,
|
||||
session_id=self.session_id,
|
||||
local=self.local,
|
||||
max_steps=self.max_steps,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
self.session_id = browse.session_id
|
||||
|
||||
return browse.message + "\n\n STATUS: " + browse.status
|
||||
56
crewai_tools/tools/mysql_search_tool/README.md
Normal file
56
crewai_tools/tools/mysql_search_tool/README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# MySQLSearchTool
|
||||
|
||||
## Description
|
||||
This tool is designed to facilitate semantic searches within MySQL database tables. Leveraging the RAG (Retrieve and Generate) technology, the MySQLSearchTool provides users with an efficient means of querying database table content, specifically tailored for MySQL databases. It simplifies the process of finding relevant data through semantic search queries, making it an invaluable resource for users needing to perform advanced queries on extensive datasets within a MySQL database.
|
||||
|
||||
## Installation
|
||||
To install the `crewai_tools` package and utilize the MySQLSearchTool, execute the following command in your terminal:
|
||||
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
Below is an example showcasing how to use the MySQLSearchTool to conduct a semantic search on a table within a MySQL database:
|
||||
|
||||
```python
|
||||
from crewai_tools import MySQLSearchTool
|
||||
|
||||
# Initialize the tool with the database URI and the target table name
|
||||
tool = MySQLSearchTool(db_uri='mysql://user:password@localhost:3306/mydatabase', table_name='employees')
|
||||
|
||||
```
|
||||
|
||||
## Arguments
|
||||
The MySQLSearchTool requires the following arguments for its operation:
|
||||
|
||||
- `db_uri`: A string representing the URI of the MySQL database to be queried. This argument is mandatory and must include the necessary authentication details and the location of the database.
|
||||
- `table_name`: A string specifying the name of the table within the database on which the semantic search will be performed. This argument is mandatory.
|
||||
|
||||
## Custom model and embeddings
|
||||
|
||||
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
|
||||
|
||||
```python
|
||||
tool = MySQLSearchTool(
|
||||
config=dict(
|
||||
llm=dict(
|
||||
provider="ollama", # or google, openai, anthropic, llama2, ...
|
||||
config=dict(
|
||||
model="llama2",
|
||||
# temperature=0.5,
|
||||
# top_p=1,
|
||||
# stream=true,
|
||||
),
|
||||
),
|
||||
embedder=dict(
|
||||
provider="google",
|
||||
config=dict(
|
||||
model="models/embedding-001",
|
||||
task_type="retrieval_document",
|
||||
# title="Embeddings",
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
```
|
||||
51
crewai_tools/tools/mysql_search_tool/mysql_search_tool.py
Normal file
51
crewai_tools/tools/mysql_search_tool/mysql_search_tool.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from typing import Any, Type
|
||||
|
||||
try:
|
||||
from embedchain.loaders.mysql import MySQLLoader
|
||||
EMBEDCHAIN_AVAILABLE = True
|
||||
except ImportError:
|
||||
EMBEDCHAIN_AVAILABLE = False
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class MySQLSearchToolSchema(BaseModel):
|
||||
"""Input for MySQLSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory semantic search query you want to use to search the database's content",
|
||||
)
|
||||
|
||||
|
||||
class MySQLSearchTool(RagTool):
|
||||
name: str = "Search a database's table content"
|
||||
description: str = "A tool that can be used to semantic search a query from a database table's content."
|
||||
args_schema: Type[BaseModel] = MySQLSearchToolSchema
|
||||
db_uri: str = Field(..., description="Mandatory database URI")
|
||||
|
||||
def __init__(self, table_name: str, **kwargs):
|
||||
if not EMBEDCHAIN_AVAILABLE:
|
||||
raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`")
|
||||
super().__init__(**kwargs)
|
||||
kwargs["data_type"] = "mysql"
|
||||
kwargs["loader"] = MySQLLoader(config=dict(url=self.db_uri))
|
||||
self.add(table_name)
|
||||
self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content."
|
||||
self._generate_description()
|
||||
|
||||
def add(
|
||||
self,
|
||||
table_name: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().add(f"SELECT * FROM {table_name};", **kwargs)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
search_query: str,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
return super()._run(query=search_query)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user