From a2212953940ff657835afaed2b56de9f8d6c68c2 Mon Sep 17 00:00:00 2001 From: Tony Kipkemboi Date: Fri, 8 Aug 2025 16:05:43 -0400 Subject: [PATCH] WIP: docs updates (#3296) --- docs/docs.json | 20 ++- docs/en/telemetry.mdx | 2 +- docs/en/tools/automation/overview.mdx | 4 + .../en/tools/automation/zapieractionstool.mdx | 58 ++++++ .../database-data/mongodbvectorsearchtool.mdx | 168 ++++++++++++++++++ docs/en/tools/database-data/overview.mdx | 8 + .../database-data/singlestoresearchtool.mdx | 61 +++++++ docs/en/tools/file-document/ocrtool.mdx | 89 ++++++++++ docs/en/tools/file-document/overview.mdx | 8 + .../file-document/pdf-text-writing-tool.mdx | 76 ++++++++ .../tools/search-research/arxivpapertool.mdx | 112 ++++++++++++ .../tools/search-research/bravesearchtool.mdx | 2 +- .../search-research/databricks-query-tool.mdx | 80 +++++++++ .../search-research/githubsearchtool.mdx | 2 + docs/en/tools/search-research/overview.mdx | 12 ++ .../serpapi-googlesearchtool.mdx | 65 +++++++ .../serpapi-googleshoppingtool.mdx | 61 +++++++ .../tools/search-research/serperdevtool.mdx | 2 +- .../search-research/tavilyextractortool.mdx | 2 +- .../search-research/tavilysearchtool.mdx | 2 + .../tools/web-scraping/brightdata-tools.mdx | 111 ++++++++++++ docs/en/tools/web-scraping/overview.mdx | 4 + 22 files changed, 940 insertions(+), 9 deletions(-) create mode 100644 docs/en/tools/automation/zapieractionstool.mdx create mode 100644 docs/en/tools/database-data/mongodbvectorsearchtool.mdx create mode 100644 docs/en/tools/database-data/singlestoresearchtool.mdx create mode 100644 docs/en/tools/file-document/ocrtool.mdx create mode 100644 docs/en/tools/file-document/pdf-text-writing-tool.mdx create mode 100644 docs/en/tools/search-research/arxivpapertool.mdx create mode 100644 docs/en/tools/search-research/databricks-query-tool.mdx create mode 100644 docs/en/tools/search-research/serpapi-googlesearchtool.mdx create mode 100644 docs/en/tools/search-research/serpapi-googleshoppingtool.mdx create mode 100644 docs/en/tools/web-scraping/brightdata-tools.mdx diff --git a/docs/docs.json b/docs/docs.json index 0a6d80329..681618ea1 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -128,7 +128,9 @@ "en/tools/file-document/jsonsearchtool", "en/tools/file-document/csvsearchtool", "en/tools/file-document/directorysearchtool", - "en/tools/file-document/directoryreadtool" + "en/tools/file-document/directoryreadtool", + "en/tools/file-document/ocrtool", + "en/tools/file-document/pdf-text-writing-tool" ] }, { @@ -146,7 +148,8 @@ "en/tools/web-scraping/stagehandtool", "en/tools/web-scraping/firecrawlcrawlwebsitetool", "en/tools/web-scraping/firecrawlscrapewebsitetool", - "en/tools/web-scraping/oxylabsscraperstool" + "en/tools/web-scraping/oxylabsscraperstool", + "en/tools/web-scraping/brightdata-tools" ] }, { @@ -163,7 +166,11 @@ "en/tools/search-research/youtubechannelsearchtool", "en/tools/search-research/youtubevideosearchtool", "en/tools/search-research/tavilysearchtool", - "en/tools/search-research/tavilyextractortool" + "en/tools/search-research/tavilyextractortool", + "en/tools/search-research/arxivpapertool", + "en/tools/search-research/serpapi-googlesearchtool", + "en/tools/search-research/serpapi-googleshoppingtool", + "en/tools/search-research/databricks-query-tool" ] }, { @@ -175,7 +182,9 @@ "en/tools/database-data/snowflakesearchtool", "en/tools/database-data/nl2sqltool", "en/tools/database-data/qdrantvectorsearchtool", - "en/tools/database-data/weaviatevectorsearchtool" + "en/tools/database-data/weaviatevectorsearchtool", + "en/tools/database-data/mongodbvectorsearchtool", + "en/tools/database-data/singlestoresearchtool" ] }, { @@ -207,7 +216,8 @@ "en/tools/automation/overview", "en/tools/automation/apifyactorstool", "en/tools/automation/composiotool", - "en/tools/automation/multiontool" + "en/tools/automation/multiontool", + "en/tools/automation/zapieractionstool" ] } ] diff --git a/docs/en/telemetry.mdx b/docs/en/telemetry.mdx index 35f713860..afa1fcaa0 100644 --- a/docs/en/telemetry.mdx +++ b/docs/en/telemetry.mdx @@ -35,7 +35,7 @@ os.environ['OTEL_SDK_DISABLED'] = 'true' ### Data Explanation: | Defaulted | Data | Reason and Specifics | -|-----------|-------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| +|:----------|:------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------| | Yes | CrewAI and Python Version | Tracks software versions. Example: CrewAI v1.2.3, Python 3.8.10. No personal data. | | Yes | Crew Metadata | Includes: randomly generated key and ID, process type (e.g., 'sequential', 'parallel'), boolean flag for memory usage (true/false), count of tasks, count of agents. All non-personal. | | Yes | Agent Data | Includes: randomly generated key and ID, role name (should not include personal info), boolean settings (verbose, delegation enabled, code execution allowed), max iterations, max RPM, max retry limit, LLM info (see LLM Attributes), list of tool names (should not include personal info). No personal data. | diff --git a/docs/en/tools/automation/overview.mdx b/docs/en/tools/automation/overview.mdx index a189e0639..a85ac9d6c 100644 --- a/docs/en/tools/automation/overview.mdx +++ b/docs/en/tools/automation/overview.mdx @@ -20,6 +20,10 @@ These tools enable your agents to automate workflows, integrate with external pl Automate browser interactions and web-based workflows. + + + Expose Zapier Actions as CrewAI tools for automation across thousands of apps. + ## **Common Use Cases** diff --git a/docs/en/tools/automation/zapieractionstool.mdx b/docs/en/tools/automation/zapieractionstool.mdx new file mode 100644 index 000000000..378a1b6e3 --- /dev/null +++ b/docs/en/tools/automation/zapieractionstool.mdx @@ -0,0 +1,58 @@ +--- +title: Zapier Actions Tool +description: The `ZapierActionsAdapter` exposes Zapier actions as CrewAI tools for automation. +icon: bolt +--- + +# `ZapierActionsAdapter` + +## Description + +Use the Zapier adapter to list and call Zapier actions as CrewAI tools. This enables agents to trigger automations across thousands of apps. + +## Installation + +This adapter is included with `crewai-tools`. No extra install required. + +## Environment Variables + +- `ZAPIER_API_KEY` (required): Zapier API key. Get one from the Zapier Actions dashboard at https://actions.zapier.com/ (create an account, then generate an API key). You can also pass `zapier_api_key` directly when constructing the adapter. + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools.adapters.zapier_adapter import ZapierActionsAdapter + +adapter = ZapierActionsAdapter(api_key="your_zapier_api_key") +tools = adapter.tools() + +agent = Agent( + role="Automator", + goal="Execute Zapier actions", + backstory="Automation specialist", + tools=tools, + verbose=True, +) + +task = Task( + description="Create a new Google Sheet and add a row using Zapier actions", + expected_output="Confirmation with created resource IDs", + agent=agent, +) + +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + +## Notes & limits + +- The adapter lists available actions for your key and creates `BaseTool` wrappers dynamically. +- Handle action‑specific required fields in your task instructions or tool call. +- Rate limits depend on your Zapier plan; see the Zapier Actions docs. + +## Notes + +- The adapter fetches available actions and generates `BaseTool` wrappers dynamically. + + diff --git a/docs/en/tools/database-data/mongodbvectorsearchtool.mdx b/docs/en/tools/database-data/mongodbvectorsearchtool.mdx new file mode 100644 index 000000000..35d0fe0f1 --- /dev/null +++ b/docs/en/tools/database-data/mongodbvectorsearchtool.mdx @@ -0,0 +1,168 @@ +--- +title: MongoDB Vector Search Tool +description: The `MongoDBVectorSearchTool` performs vector search on MongoDB Atlas with optional indexing helpers. +icon: "leaf" +--- + +# `MongoDBVectorSearchTool` + +## Description + +Perform vector similarity queries on MongoDB Atlas collections. Supports index creation helpers and bulk insert of embedded texts. + +MongoDB Atlas supports native vector search. Learn more: +https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/ + +## Installation + +Install with the MongoDB extra: + +```shell +pip install crewai-tools[mongodb] +``` + +or + +```shell +uv add crewai-tools --extra mongodb +``` + +## Parameters + +### Initialization + +- `connection_string` (str, required) +- `database_name` (str, required) +- `collection_name` (str, required) +- `vector_index_name` (str, default `vector_index`) +- `text_key` (str, default `text`) +- `embedding_key` (str, default `embedding`) +- `dimensions` (int, default `1536`) + +### Run Parameters + +- `query` (str, required): Natural language query to embed and search. + +## Quick start + +```python Code +from crewai_tools import MongoDBVectorSearchTool + +tool = MongoDBVectorSearchTool( + connection_string="mongodb+srv://...", + database_name="mydb", + collection_name="docs", +) + +print(tool.run(query="how to create vector index")) +``` + +## Index creation helpers + +Use `create_vector_search_index(...)` to provision an Atlas Vector Search index with the correct dimensions and similarity. + +## Common issues + +- Authentication failures: ensure your Atlas IP Access List allows your runner and the connection string includes credentials. +- Index not found: create the vector index first; name must match `vector_index_name`. +- Dimensions mismatch: align embedding model dimensions with `dimensions`. + +## More examples + +### Basic initialization + +```python Code +from crewai_tools import MongoDBVectorSearchTool + +tool = MongoDBVectorSearchTool( + database_name="example_database", + collection_name="example_collection", + connection_string="", +) +``` + +### Custom query configuration + +```python Code +from crewai_tools import MongoDBVectorSearchConfig, MongoDBVectorSearchTool + +query_config = MongoDBVectorSearchConfig(limit=10, oversampling_factor=2) +tool = MongoDBVectorSearchTool( + database_name="example_database", + collection_name="example_collection", + connection_string="", + query_config=query_config, + vector_index_name="my_vector_index", +) + +rag_agent = Agent( + name="rag_agent", + role="You are a helpful assistant that can answer questions with the help of the MongoDBVectorSearchTool.", + goal="...", + backstory="...", + tools=[tool], +) +``` + +### Preloading the database and creating the index + +```python Code +import os +from crewai_tools import MongoDBVectorSearchTool + +tool = MongoDBVectorSearchTool( + database_name="example_database", + collection_name="example_collection", + connection_string="", +) + +# Load text content from a local folder and add to MongoDB +texts = [] +for fname in os.listdir("knowledge"): + path = os.path.join("knowledge", fname) + if os.path.isfile(path): + with open(path, "r", encoding="utf-8") as f: + texts.append(f.read()) + +tool.add_texts(texts) + +# Create the Atlas Vector Search index (e.g., 3072 dims for text-embedding-3-large) +tool.create_vector_search_index(dimensions=3072) +``` + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import MongoDBVectorSearchTool + +tool = MongoDBVectorSearchTool( + connection_string="mongodb+srv://...", + database_name="mydb", + collection_name="docs", +) + +agent = Agent( + role="RAG Agent", + goal="Answer using MongoDB vector search", + backstory="Knowledge retrieval specialist", + tools=[tool], + verbose=True, +) + +task = Task( + description="Find relevant content for 'indexing guidance'", + expected_output="A concise answer citing the most relevant matches", + agent=agent, +) + +crew = Crew( + agents=[agent], + tasks=[task], + verbose=True, +) + +result = crew.kickoff() +``` + + diff --git a/docs/en/tools/database-data/overview.mdx b/docs/en/tools/database-data/overview.mdx index c63529790..50522af5e 100644 --- a/docs/en/tools/database-data/overview.mdx +++ b/docs/en/tools/database-data/overview.mdx @@ -32,6 +32,14 @@ These tools enable your agents to interact with various database systems, from t Perform semantic search with Weaviate vector database. + + + Vector similarity search on MongoDB Atlas with indexing helpers. + + + + Safe SELECT/SHOW queries on SingleStore with pooling and validation. + ## **Common Use Cases** diff --git a/docs/en/tools/database-data/singlestoresearchtool.mdx b/docs/en/tools/database-data/singlestoresearchtool.mdx new file mode 100644 index 000000000..30c6b7786 --- /dev/null +++ b/docs/en/tools/database-data/singlestoresearchtool.mdx @@ -0,0 +1,61 @@ +--- +title: SingleStore Search Tool +description: The `SingleStoreSearchTool` safely executes SELECT/SHOW queries on SingleStore with pooling. +icon: circle +--- + +# `SingleStoreSearchTool` + +## Description + +Execute read‑only queries (`SELECT`/`SHOW`) against SingleStore with connection pooling and input validation. + +## Installation + +```shell +uv add crewai-tools[singlestore] +``` + +## Environment Variables + +Variables like `SINGLESTOREDB_HOST`, `SINGLESTOREDB_USER`, `SINGLESTOREDB_PASSWORD`, etc., can be used, or `SINGLESTOREDB_URL` as a single DSN. + +Generate the API key from the SingleStore dashboard, [docs here](https://docs.singlestore.com/cloud/reference/management-api/#generate-an-api-key). + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import SingleStoreSearchTool + +tool = SingleStoreSearchTool( + tables=["products"], + host="host", + user="user", + password="pass", + database="db", +) + +agent = Agent( + role="Analyst", + goal="Query SingleStore", + tools=[tool], + verbose=True, +) + +task = Task( + description="List 5 products", + expected_output="5 rows as JSON/text", + agent=agent, +) + +crew = Crew( + agents=[agent], + tasks=[task], + verbose=True, +) + +result = crew.kickoff() +``` + + diff --git a/docs/en/tools/file-document/ocrtool.mdx b/docs/en/tools/file-document/ocrtool.mdx new file mode 100644 index 000000000..b07e2897e --- /dev/null +++ b/docs/en/tools/file-document/ocrtool.mdx @@ -0,0 +1,89 @@ +--- +title: OCR Tool +description: The `OCRTool` extracts text from local images or image URLs using an LLM with vision. +icon: image +--- + +# `OCRTool` + +## Description + +Extract text from images (local path or URL). Uses a vision‑capable LLM via CrewAI’s LLM interface. + +## Installation + +No extra install beyond `crewai-tools`. Ensure your selected LLM supports vision. + +## Parameters + +### Run Parameters + +- `image_path_url` (str, required): Local image path or HTTP(S) URL. + +## Examples + +### Direct usage + +```python Code +from crewai_tools import OCRTool + +print(OCRTool().run(image_path_url="/tmp/receipt.png")) +``` + +### With an agent + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import OCRTool + +ocr = OCRTool() + +agent = Agent( + role="OCR", + goal="Extract text", + tools=[ocr], +) + +task = Task( + description="Extract text from https://example.com/invoice.jpg", + expected_output="All detected text in plain text", + agent=agent, +) + +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + +## Notes + +- Ensure the selected LLM supports image inputs. +- For large images, consider downscaling to reduce token usage. + - You can pass a specific LLM instance to the tool (e.g., `LLM(model="gpt-4o")`) if needed, matching the README guidance. + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import OCRTool + +tool = OCRTool() + +agent = Agent( + role="OCR Specialist", + goal="Extract text from images", + backstory="Vision‑enabled analyst", + tools=[tool], + verbose=True, +) + +task = Task( + description="Extract text from https://example.com/receipt.png", + expected_output="All detected text in plain text", + agent=agent, +) + +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + + diff --git a/docs/en/tools/file-document/overview.mdx b/docs/en/tools/file-document/overview.mdx index b0053b1ca..9e3a83b7b 100644 --- a/docs/en/tools/file-document/overview.mdx +++ b/docs/en/tools/file-document/overview.mdx @@ -52,6 +52,14 @@ These tools enable your agents to work with various file formats and document ty Read and list directory contents, file structures, and metadata. + + + Extract text from images (local files or URLs) using a vision‑capable LLM. + + + + Write text at specific coordinates in PDFs, with optional custom fonts. + ## **Common Use Cases** diff --git a/docs/en/tools/file-document/pdf-text-writing-tool.mdx b/docs/en/tools/file-document/pdf-text-writing-tool.mdx new file mode 100644 index 000000000..edbd1575c --- /dev/null +++ b/docs/en/tools/file-document/pdf-text-writing-tool.mdx @@ -0,0 +1,76 @@ +--- +title: PDF Text Writing Tool +description: The `PDFTextWritingTool` writes text to specific positions in a PDF, supporting custom fonts. +icon: file-pdf +--- + +# `PDFTextWritingTool` + +## Description + +Write text at precise coordinates on a PDF page, optionally embedding a custom TrueType font. + +## Parameters + +### Run Parameters + +- `pdf_path` (str, required): Path to the input PDF. +- `text` (str, required): Text to add. +- `position` (tuple[int, int], required): `(x, y)` coordinates. +- `font_size` (int, default `12`) +- `font_color` (str, default `"0 0 0 rg"`) +- `font_name` (str, default `"F1"`) +- `font_file` (str, optional): Path to `.ttf` file. +- `page_number` (int, default `0`) + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import PDFTextWritingTool + +tool = PDFTextWritingTool() + +agent = Agent( + role="PDF Editor", + goal="Annotate PDFs", + backstory="Documentation specialist", + tools=[tool], + verbose=True, +) + +task = Task( + description="Write 'CONFIDENTIAL' at (72, 720) on page 1 of ./sample.pdf", + expected_output="Confirmation message", + agent=agent, +) + +crew = Crew( + agents=[agent], + tasks=[task], + verbose=True, +) + +result = crew.kickoff() +``` + +### Direct usage + +```python Code +from crewai_tools import PDFTextWritingTool + +PDFTextWritingTool().run( + pdf_path="./input.pdf", + text="CONFIDENTIAL", + position=(72, 720), + font_size=18, + page_number=0, +) +``` + +## Tips + +- Coordinate origin is the bottom‑left corner. +- If using a custom font (`font_file`), ensure it is a valid `.ttf`. + + diff --git a/docs/en/tools/search-research/arxivpapertool.mdx b/docs/en/tools/search-research/arxivpapertool.mdx new file mode 100644 index 000000000..ce428fdda --- /dev/null +++ b/docs/en/tools/search-research/arxivpapertool.mdx @@ -0,0 +1,112 @@ +--- +title: Arxiv Paper Tool +description: The `ArxivPaperTool` searches arXiv for papers matching a query and optionally downloads PDFs. +icon: box-archive +--- + +# `ArxivPaperTool` + +## Description + +The `ArxivPaperTool` queries the arXiv API for academic papers and returns compact, readable results. It can also optionally download PDFs to disk. + +## Installation + +This tool has no special installation beyond `crewai-tools`. + +```shell +uv add crewai-tools +``` + +No API key is required. This tool uses the public arXiv Atom API. + +## Steps to Get Started + +1. Initialize the tool. +2. Provide a `search_query` (e.g., "transformer neural network"). +3. Optionally set `max_results` (1–100) and enable PDF downloads in the constructor. + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import ArxivPaperTool + +tool = ArxivPaperTool( + download_pdfs=False, + save_dir="./arxiv_pdfs", + use_title_as_filename=True, +) + +agent = Agent( + role="Researcher", + goal="Find relevant arXiv papers", + backstory="Expert at literature discovery", + tools=[tool], + verbose=True, +) + +task = Task( + description="Search arXiv for 'transformer neural network' and list top 5 results.", + expected_output="A concise list of 5 relevant papers with titles, links, and summaries.", + agent=agent, +) + +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + +### Direct usage (without Agent) + +```python Code +from crewai_tools import ArxivPaperTool + +tool = ArxivPaperTool( + download_pdfs=True, + save_dir="./arxiv_pdfs", +) +print(tool.run(search_query="mixture of experts", max_results=3)) +``` + +## Parameters + +### Initialization Parameters + +- `download_pdfs` (bool, default `False`): Whether to download PDFs. +- `save_dir` (str, default `./arxiv_pdfs`): Directory to save PDFs. +- `use_title_as_filename` (bool, default `False`): Use paper titles for filenames. + +### Run Parameters + +- `search_query` (str, required): The arXiv search query. +- `max_results` (int, default `5`, range 1–100): Number of results. + +## Output format + +The tool returns a human‑readable list of papers with: +- Title +- Link (abs page) +- Snippet/summary (truncated) + +When `download_pdfs=True`, PDFs are saved to disk and the summary mentions saved files. + +## Usage Notes + +- The tool returns formatted text with key metadata and links. +- When `download_pdfs=True`, PDFs will be stored in `save_dir`. + +## Troubleshooting + +- If you receive a network timeout, re‑try or reduce `max_results`. +- Invalid XML errors indicate an arXiv response parse issue; try a simpler query. +- File system errors (e.g., permission denied) may occur when saving PDFs; ensure `save_dir` is writable. + +## Related links + +- arXiv API docs: https://info.arxiv.org/help/api/index.html + +## Error Handling + +- Network issues, invalid XML, and OS errors are handled with informative messages. + + diff --git a/docs/en/tools/search-research/bravesearchtool.mdx b/docs/en/tools/search-research/bravesearchtool.mdx index 03147a6f8..b651b2a58 100644 --- a/docs/en/tools/search-research/bravesearchtool.mdx +++ b/docs/en/tools/search-research/bravesearchtool.mdx @@ -23,7 +23,7 @@ pip install 'crewai[tools]' To effectively use the `BraveSearchTool`, follow these steps: 1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. -2. **API Key Acquisition**: Acquire a Brave Search API key by registering at [Brave Search API](https://api.search.brave.com/app/keys). +2. **API Key Acquisition**: Acquire a Brave Search API key at https://api.search.brave.com/app/keys (sign in to generate a key). 3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool. ## Example diff --git a/docs/en/tools/search-research/databricks-query-tool.mdx b/docs/en/tools/search-research/databricks-query-tool.mdx new file mode 100644 index 000000000..5b471b452 --- /dev/null +++ b/docs/en/tools/search-research/databricks-query-tool.mdx @@ -0,0 +1,80 @@ +--- +title: Databricks SQL Query Tool +description: The `DatabricksQueryTool` executes SQL queries against Databricks workspace tables. +icon: trowel-bricks +--- + +# `DatabricksQueryTool` + +## Description + +Run SQL against Databricks workspace tables with either CLI profile or direct host/token authentication. + +## Installation + +```shell +uv add crewai-tools[databricks-sdk] +``` + +## Environment Variables + +- `DATABRICKS_CONFIG_PROFILE` or (`DATABRICKS_HOST` + `DATABRICKS_TOKEN`) + +Create a personal access token and find host details in the Databricks workspace under User Settings → Developer. +Docs: https://docs.databricks.com/en/dev-tools/auth/pat.html + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import DatabricksQueryTool + +tool = DatabricksQueryTool( + default_catalog="main", + default_schema="default", +) + +agent = Agent( + role="Data Analyst", + goal="Query Databricks", + tools=[tool], + verbose=True, +) + +task = Task( + description="SELECT * FROM my_table LIMIT 10", + expected_output="10 rows", + agent=agent, +) + +crew = Crew( + agents=[agent], + tasks=[task], + verbose=True, +) +result = crew.kickoff() + +print(result) +``` + +## Parameters + +- `query` (required): SQL query to execute +- `catalog` (optional): Override default catalog +- `db_schema` (optional): Override default schema +- `warehouse_id` (optional): Override default SQL warehouse +- `row_limit` (optional): Maximum rows to return (default: 1000) + +## Defaults on initialization + +- `default_catalog` +- `default_schema` +- `default_warehouse_id` + +### Error handling & tips + +- Authentication errors: verify `DATABRICKS_HOST` begins with `https://` and token is valid. +- Permissions: ensure your SQL warehouse and schema are accessible by your token. +- Limits: long‑running queries should be avoided in agent loops; add filters/limits. + + diff --git a/docs/en/tools/search-research/githubsearchtool.mdx b/docs/en/tools/search-research/githubsearchtool.mdx index 80b363a3c..84a1ee9b3 100644 --- a/docs/en/tools/search-research/githubsearchtool.mdx +++ b/docs/en/tools/search-research/githubsearchtool.mdx @@ -24,6 +24,8 @@ pip install 'crewai[tools]' This command installs the necessary package to run the GithubSearchTool along with any other tools included in the crewai_tools package. +Get a GitHub Personal Access Token at https://github.com/settings/tokens (Developer settings → Fine‑grained tokens or classic tokens). + ## Example Here’s how you can use the GithubSearchTool to perform semantic searches within a GitHub repository: diff --git a/docs/en/tools/search-research/overview.mdx b/docs/en/tools/search-research/overview.mdx index 6390fba53..2ccd696fb 100644 --- a/docs/en/tools/search-research/overview.mdx +++ b/docs/en/tools/search-research/overview.mdx @@ -52,6 +52,18 @@ These tools enable your agents to search the web, research topics, and find info Extract structured content from web pages using the Tavily API. + + + Search arXiv and optionally download PDFs. + + + + Google search via SerpApi with structured results. + + + + Google Shopping queries via SerpApi. + ## **Common Use Cases** diff --git a/docs/en/tools/search-research/serpapi-googlesearchtool.mdx b/docs/en/tools/search-research/serpapi-googlesearchtool.mdx new file mode 100644 index 000000000..94474e274 --- /dev/null +++ b/docs/en/tools/search-research/serpapi-googlesearchtool.mdx @@ -0,0 +1,65 @@ +--- +title: SerpApi Google Search Tool +description: The `SerpApiGoogleSearchTool` performs Google searches using the SerpApi service. +icon: google +--- + +# `SerpApiGoogleSearchTool` + +## Description + +Use the `SerpApiGoogleSearchTool` to run Google searches with SerpApi and retrieve structured results. Requires a SerpApi API key. + +## Installation + +```shell +uv add crewai-tools[serpapi] +``` + +## Environment Variables + +- `SERPAPI_API_KEY` (required): API key for SerpApi. Create one at https://serpapi.com/ (free tier available). + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import SerpApiGoogleSearchTool + +tool = SerpApiGoogleSearchTool() + +agent = Agent( + role="Researcher", + goal="Answer questions using Google search", + backstory="Search specialist", + tools=[tool], + verbose=True, +) + +task = Task( + description="Search for the latest CrewAI releases", + expected_output="A concise list of relevant results with titles and links", + agent=agent, +) + +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + +## Notes + +- Set `SERPAPI_API_KEY` in the environment. Create a key at https://serpapi.com/ +- See also Google Shopping via SerpApi: `/en/tools/search-research/serpapi-googleshoppingtool` + +## Parameters + +### Run Parameters + +- `search_query` (str, required): The Google query. +- `location` (str, optional): Geographic location parameter. + +## Notes + +- This tool wraps SerpApi and returns structured search results. + + diff --git a/docs/en/tools/search-research/serpapi-googleshoppingtool.mdx b/docs/en/tools/search-research/serpapi-googleshoppingtool.mdx new file mode 100644 index 000000000..e7b745b25 --- /dev/null +++ b/docs/en/tools/search-research/serpapi-googleshoppingtool.mdx @@ -0,0 +1,61 @@ +--- +title: SerpApi Google Shopping Tool +description: The `SerpApiGoogleShoppingTool` searches Google Shopping results using SerpApi. +icon: cart-shopping +--- + +# `SerpApiGoogleShoppingTool` + +## Description + +Leverage `SerpApiGoogleShoppingTool` to query Google Shopping via SerpApi and retrieve product-oriented results. + +## Installation + +```shell +uv add crewai-tools[serpapi] +``` + +## Environment Variables + +- `SERPAPI_API_KEY` (required): API key for SerpApi. Create one at https://serpapi.com/ (free tier available). + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import SerpApiGoogleShoppingTool + +tool = SerpApiGoogleShoppingTool() + +agent = Agent( + role="Shopping Researcher", + goal="Find relevant products", + backstory="Expert in product search", + tools=[tool], + verbose=True, +) + +task = Task( + description="Search Google Shopping for 'wireless noise-canceling headphones'", + expected_output="Top relevant products with titles and links", + agent=agent, +) + +crew = Crew(agents=[agent], tasks=[task]) +result = crew.kickoff() +``` + +## Notes + +- Set `SERPAPI_API_KEY` in the environment. Create a key at https://serpapi.com/ +- See also Google Web Search via SerpApi: `/en/tools/search-research/serpapi-googlesearchtool` + +## Parameters + +### Run Parameters + +- `search_query` (str, required): Product search query. +- `location` (str, optional): Geographic location parameter. + + diff --git a/docs/en/tools/search-research/serperdevtool.mdx b/docs/en/tools/search-research/serperdevtool.mdx index 9c3e52b20..7a179934e 100644 --- a/docs/en/tools/search-research/serperdevtool.mdx +++ b/docs/en/tools/search-research/serperdevtool.mdx @@ -16,7 +16,7 @@ to fetch and display the most relevant search results based on the query provide To effectively use the `SerperDevTool`, follow these steps: 1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. -2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at `serper.dev`. +2. **API Key Acquisition**: Acquire a `serper.dev` API key at https://serper.dev/ (free tier available). 3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPER_API_KEY` to facilitate its use by the tool. To incorporate this tool into your project, follow the installation instructions below: diff --git a/docs/en/tools/search-research/tavilyextractortool.mdx b/docs/en/tools/search-research/tavilyextractortool.mdx index a8dacd445..8f979831d 100644 --- a/docs/en/tools/search-research/tavilyextractortool.mdx +++ b/docs/en/tools/search-research/tavilyextractortool.mdx @@ -1,7 +1,7 @@ --- title: "Tavily Extractor Tool" description: "Extract structured content from web pages using the Tavily API" -icon: "file-text" +icon: square-poll-horizontal --- The `TavilyExtractorTool` allows CrewAI agents to extract structured content from web pages using the Tavily API. It can process single URLs or lists of URLs and provides options for controlling the extraction depth and including images. diff --git a/docs/en/tools/search-research/tavilysearchtool.mdx b/docs/en/tools/search-research/tavilysearchtool.mdx index d78784b53..1c6bdd2c4 100644 --- a/docs/en/tools/search-research/tavilysearchtool.mdx +++ b/docs/en/tools/search-research/tavilysearchtool.mdx @@ -22,6 +22,8 @@ Ensure your Tavily API key is set as an environment variable: export TAVILY_API_KEY='your_tavily_api_key' ``` +Get an API key at https://app.tavily.com/ (sign up, then create a key). + ## Example Usage Here's how to initialize and use the `TavilySearchTool` within a CrewAI agent: diff --git a/docs/en/tools/web-scraping/brightdata-tools.mdx b/docs/en/tools/web-scraping/brightdata-tools.mdx new file mode 100644 index 000000000..a2e9799f8 --- /dev/null +++ b/docs/en/tools/web-scraping/brightdata-tools.mdx @@ -0,0 +1,111 @@ +--- +title: Bright Data Tools +description: Bright Data integrations for SERP search, Web Unlocker scraping, and Dataset API. +icon: spider +--- + +# Bright Data Tools + +This set of tools integrates Bright Data services for web extraction. + +## Installation + +```shell +uv add crewai-tools requests aiohttp +``` + +## Environment Variables + +- `BRIGHT_DATA_API_KEY` (required) +- `BRIGHT_DATA_ZONE` (for SERP/Web Unlocker) + +Create credentials at https://brightdata.com/ (sign up, then create an API token and zone). +See their docs: https://developers.brightdata.com/ + +## Included Tools + +- `BrightDataSearchTool`: SERP search (Google/Bing/Yandex) with geo/language/device options. +- `BrightDataWebUnlockerTool`: Scrape pages with anti-bot bypass and rendering. +- `BrightDataDatasetTool`: Run Dataset API jobs and fetch results. + +## Examples + +### SERP Search + +```python Code +from crewai_tools import BrightDataSearchTool + +tool = BrightDataSearchTool( + query="CrewAI", + country="us", +) + +print(tool.run()) +``` + +### Web Unlocker + +```python Code +from crewai_tools import BrightDataWebUnlockerTool + +tool = BrightDataWebUnlockerTool( + url="https://example.com", + format="markdown", +) + +print(tool.run(url="https://example.com")) +``` + +### Dataset API + +```python Code +from crewai_tools import BrightDataDatasetTool + +tool = BrightDataDatasetTool( + dataset_type="ecommerce", + url="https://example.com/product", +) + +print(tool.run()) +``` + +## Troubleshooting + +- 401/403: verify `BRIGHT_DATA_API_KEY` and `BRIGHT_DATA_ZONE`. +- Empty/blocked content: enable rendering or try a different zone. + +## Example + +```python Code +from crewai import Agent, Task, Crew +from crewai_tools import BrightDataSearchTool + +tool = BrightDataSearchTool( + query="CrewAI", + country="us", +) + +agent = Agent( + role="Web Researcher", + goal="Search with Bright Data", + backstory="Finds reliable results", + tools=[tool], + verbose=True, +) + +task = Task( + description="Search for CrewAI and summarize top results", + expected_output="Short summary with links", + agent=agent, +) + +crew = Crew( + agents=[agent], + tasks=[task], + verbose=True, +) + +result = crew.kickoff() +``` + + diff --git a/docs/en/tools/web-scraping/overview.mdx b/docs/en/tools/web-scraping/overview.mdx index 85ab01b5f..d5ae7d784 100644 --- a/docs/en/tools/web-scraping/overview.mdx +++ b/docs/en/tools/web-scraping/overview.mdx @@ -60,6 +60,10 @@ These tools enable your agents to interact with the web, extract data from websi Access web data at scale with Oxylabs. + + + SERP search, Web Unlocker, and Dataset API integrations. + ## **Common Use Cases**