Merge branch 'main' into vision-tool-improvement

This commit is contained in:
Mike Plachta
2025-01-03 09:36:13 -08:00
committed by GitHub
59 changed files with 2007 additions and 392 deletions

View File

@@ -13,7 +13,7 @@ In the realm of CrewAI agents, tools are pivotal for enhancing functionality. Th
<h3>
[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb)
[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) | [Discourse](https://community.crewai.com/)
</h3>
@@ -51,7 +51,7 @@ There are three ways to create tools for crewAI agents:
### Subclassing `BaseTool`
```python
from crewai_tools import BaseTool
from crewai.tools import BaseTool
class MyCustomTool(BaseTool):
name: str = "Name of my tool"
@@ -70,7 +70,7 @@ Define a new class inheriting from `BaseTool`, specifying `name`, `description`,
For a simpler approach, create a `Tool` object directly with the required attributes and a functional logic.
```python
from crewai_tools import tool
from crewai.tools import BaseTool
@tool("Name of my tool")
def my_tool(question: str) -> str:
"""Clear description for what this tool is useful for, you agent will need this information to use it."""
@@ -140,6 +140,4 @@ Thank you for your interest in enhancing the capabilities of AI agents through a
## Contact
For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb) or open an issue in this repository.
For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb), [Discourse](https://community.crewai.com/) or open an issue in this repository.

View File

@@ -1,4 +1,5 @@
from .tools import (
BraveSearchTool,
BrowserbaseLoadTool,
CodeDocsSearchTool,
CodeInterpreterTool,
@@ -16,14 +17,18 @@ from .tools import (
FirecrawlSearchTool,
GithubSearchTool,
JSONSearchTool,
LinkupSearchTool,
LlamaIndexTool,
MDXSearchTool,
MultiOnTool,
MySQLSearchTool,
NL2SQLTool,
PDFSearchTool,
PGSearchTool,
RagTool,
ScrapeElementFromWebsiteTool,
ScrapegraphScrapeTool,
ScrapegraphScrapeToolSchema,
ScrapeWebsiteTool,
ScrapflyScrapeWebsiteTool,
SeleniumScrapingTool,
@@ -40,6 +45,7 @@ from .tools import (
XMLSearchTool,
YoutubeChannelSearchTool,
YoutubeVideoSearchTool,
MySQLSearchTool
WeaviateVectorSearchTool,
SerpApiGoogleSearchTool,
SerpApiGoogleShoppingTool,
)
from .tools.base_tool import BaseTool, Tool, tool

View File

@@ -1,3 +1,4 @@
from .brave_search_tool.brave_search_tool import BraveSearchTool
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
@@ -11,27 +12,30 @@ from .exa_tools.exa_search_tool import EXASearchTool
from .file_read_tool.file_read_tool import FileReadTool
from .file_writer_tool.file_writer_tool import FileWriterTool
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
FirecrawlCrawlWebsiteTool
FirecrawlCrawlWebsiteTool,
)
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
FirecrawlScrapeWebsiteTool
FirecrawlScrapeWebsiteTool,
)
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
from .github_search_tool.github_search_tool import GithubSearchTool
from .json_search_tool.json_search_tool import JSONSearchTool
from .linkup.linkup_search_tool import LinkupSearchTool
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
from .multion_tool.multion_tool import MultiOnTool
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
from .nl2sql.nl2sql_tool import NL2SQLTool
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
from .pg_seach_tool.pg_search_tool import PGSearchTool
from .rag.rag_tool import RagTool
from .scrape_element_from_website.scrape_element_from_website import (
ScrapeElementFromWebsiteTool
ScrapeElementFromWebsiteTool,
)
from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import ScrapegraphScrapeTool, ScrapegraphScrapeToolSchema
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
ScrapflyScrapeWebsiteTool
ScrapflyScrapeWebsiteTool,
)
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
from .serper_dev_tool.serper_dev_tool import SerperDevTool
@@ -46,7 +50,9 @@ from .vision_tool.vision_tool import VisionTool
from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import (
YoutubeChannelSearchTool
YoutubeChannelSearchTool,
)
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
from .weaviate_tool.vector_search import WeaviateVectorSearchTool
from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool
from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool

View File

@@ -1,59 +0,0 @@
from typing import Any, Callable
from pydantic import BaseModel as PydanticBaseModel
from crewai.tools.base_tool import BaseTool
from crewai.tools.structured_tool import CrewStructuredTool
class Tool(BaseTool):
func: Callable
"""The function that will be executed when the tool is called."""
def _run(self, *args: Any, **kwargs: Any) -> Any:
return self.func(*args, **kwargs)
def to_langchain(
tools: list[BaseTool | CrewStructuredTool],
) -> list[CrewStructuredTool]:
return [t.to_structured_tool() if isinstance(t, BaseTool) else t for t in tools]
def tool(*args):
"""
Decorator to create a tool from a function.
"""
def _make_with_name(tool_name: str) -> Callable:
def _make_tool(f: Callable) -> BaseTool:
if f.__doc__ is None:
raise ValueError("Function must have a docstring")
if f.__annotations__ is None:
raise ValueError("Function must have type annotations")
class_name = "".join(tool_name.split()).title()
args_schema = type(
class_name,
(PydanticBaseModel,),
{
"__annotations__": {
k: v for k, v in f.__annotations__.items() if k != "return"
},
},
)
return Tool(
name=tool_name,
description=f.__doc__,
func=f,
args_schema=args_schema,
)
return _make_tool
if len(args) == 1 and callable(args[0]):
return _make_with_name(args[0].__name__)(args[0])
if len(args) == 1 and isinstance(args[0], str):
return _make_with_name(args[0])
raise ValueError("Invalid arguments")

View File

@@ -0,0 +1,30 @@
# BraveSearchTool Documentation
## Description
This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Example
The following example demonstrates how to initialize the tool and execute a search with a given query:
```python
from crewai_tools import BraveSearchTool
# Initialize the tool for internet searching capabilities
tool = BraveSearchTool()
```
## Steps to Get Started
To effectively use the `BraveSearchTool`, follow these steps:
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys).
3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool.
## Conclusion
By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.

View File

@@ -0,0 +1,118 @@
import datetime
import os
import time
from typing import Any, ClassVar, Optional, Type
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, "w") as file:
file.write(content)
print(f"Results saved to {filename}")
class BraveSearchToolSchema(BaseModel):
"""Input for BraveSearchTool."""
search_query: str = Field(
..., description="Mandatory search query you want to use to search the internet"
)
class BraveSearchTool(BaseTool):
"""
BraveSearchTool - A tool for performing web searches using the Brave Search API.
This module provides functionality to search the internet using Brave's Search API,
supporting customizable result counts and country-specific searches.
Dependencies:
- requests
- pydantic
- python-dotenv (for API key management)
"""
name: str = "Brave Web Search the internet"
description: str = (
"A tool that can be used to search the internet with a search_query."
)
args_schema: Type[BaseModel] = BraveSearchToolSchema
search_url: str = "https://api.search.brave.com/res/v1/web/search"
country: Optional[str] = ""
n_results: int = 10
save_file: bool = False
_last_request_time: ClassVar[float] = 0
_min_request_interval: ClassVar[float] = 1.0 # seconds
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if "BRAVE_API_KEY" not in os.environ:
raise ValueError(
"BRAVE_API_KEY environment variable is required for BraveSearchTool"
)
def _run(
self,
**kwargs: Any,
) -> Any:
current_time = time.time()
if (current_time - self._last_request_time) < self._min_request_interval:
time.sleep(
self._min_request_interval - (current_time - self._last_request_time)
)
BraveSearchTool._last_request_time = time.time()
try:
search_query = kwargs.get("search_query") or kwargs.get("query")
if not search_query:
raise ValueError("Search query is required")
save_file = kwargs.get("save_file", self.save_file)
n_results = kwargs.get("n_results", self.n_results)
payload = {"q": search_query, "count": n_results}
if self.country != "":
payload["country"] = self.country
headers = {
"X-Subscription-Token": os.environ["BRAVE_API_KEY"],
"Accept": "application/json",
}
response = requests.get(self.search_url, headers=headers, params=payload)
response.raise_for_status() # Handle non-200 responses
results = response.json()
if "web" in results:
results = results["web"]["results"]
string = []
for result in results:
try:
string.append(
"\n".join(
[
f"Title: {result['title']}",
f"Link: {result['url']}",
f"Snippet: {result['description']}",
"---",
]
)
)
except KeyError:
continue
content = "\n".join(string)
except requests.RequestException as e:
return f"Error performing search: {str(e)}"
except KeyError as e:
return f"Error parsing search results: {str(e)}"
if save_file:
_save_results_to_file(content)
return f"\nSearch results: {content}\n"
else:
return content

View File

@@ -1,8 +1,8 @@
import os
from typing import Any, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
class BrowserbaseLoadToolSchema(BaseModel):
@@ -15,8 +15,8 @@ class BrowserbaseLoadTool(BaseTool):
"Load webpages url in a headless browser using Browserbase and return the contents"
)
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
api_key: Optional[str] = None
project_id: Optional[str] = None
api_key: Optional[str] = os.getenv('BROWSERBASE_API_KEY')
project_id: Optional[str] = os.getenv('BROWSERBASE_PROJECT_ID')
text_content: Optional[bool] = False
session_id: Optional[str] = None
proxy: Optional[bool] = None
@@ -32,6 +32,8 @@ class BrowserbaseLoadTool(BaseTool):
**kwargs,
):
super().__init__(**kwargs)
if not self.api_key:
raise EnvironmentError("BROWSERBASE_API_KEY environment variable is required for initialization")
try:
from browserbase import Browserbase # type: ignore
except ImportError:
@@ -39,7 +41,7 @@ class BrowserbaseLoadTool(BaseTool):
"`browserbase` package not found, please run `pip install browserbase`"
)
self.browserbase = Browserbase(api_key, project_id)
self.browserbase = Browserbase(api_key=self.api_key)
self.text_content = text_content
self.session_id = session_id
self.proxy = proxy

View File

@@ -32,7 +32,7 @@ Note: Substitute 'https://docs.example.com/reference' with your target documenta
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
```python
tool = YoutubeVideoSearchTool(
tool = CodeDocsSearchTool(
config=dict(
llm=dict(
provider="ollama", # or google, openai, anthropic, llama2, ...

View File

@@ -38,3 +38,16 @@ Agent(
tools=[CodeInterpreterTool(user_dockerfile_path="<Dockerfile_path>")],
)
```
If it is difficult to connect to docker daemon automatically (especially for macOS users), you can do this to setup docker host manually
```python
from crewai_tools import CodeInterpreterTool
Agent(
...
tools=[CodeInterpreterTool(user_docker_base_url="<Docker Host Base Url>",
user_dockerfile_path="<Dockerfile_path>")],
)
```

View File

@@ -2,11 +2,12 @@ import importlib.util
import os
from typing import List, Optional, Type
import docker
from docker import from_env as docker_from_env
from docker.models.containers import Container
from docker.errors import ImageNotFound, NotFound
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class CodeInterpreterSchema(BaseModel):
"""Input for CodeInterpreterTool."""
@@ -29,6 +30,7 @@ class CodeInterpreterTool(BaseTool):
default_image_tag: str = "code-interpreter:latest"
code: Optional[str] = None
user_dockerfile_path: Optional[str] = None
user_docker_base_url: Optional[str] = None
unsafe_mode: bool = False
@staticmethod
@@ -40,12 +42,13 @@ class CodeInterpreterTool(BaseTool):
"""
Verify if the Docker image is available. Optionally use a user-provided Dockerfile.
"""
client = docker.from_env()
client = docker_from_env() if self.user_docker_base_url == None else docker.DockerClient(base_url=self.user_docker_base_url)
try:
client.images.get(self.default_image_tag)
except docker.errors.ImageNotFound:
except ImageNotFound:
if self.user_dockerfile_path and os.path.exists(self.user_dockerfile_path):
dockerfile_path = self.user_dockerfile_path
else:
@@ -74,17 +77,17 @@ class CodeInterpreterTool(BaseTool):
return self.run_code_in_docker(code, libraries_used)
def _install_libraries(
self, container: docker.models.containers.Container, libraries: List[str]
self, container: Container, libraries: List[str]
) -> None:
"""
Install missing libraries in the Docker container
"""
for library in libraries:
container.exec_run(f"pip install {library}")
container.exec_run(["pip", "install", library])
def _init_docker_container(self) -> docker.models.containers.Container:
def _init_docker_container(self) -> Container:
container_name = "code-interpreter"
client = docker.from_env()
client = docker_from_env()
current_path = os.getcwd()
# Check if the container is already running
@@ -92,7 +95,7 @@ class CodeInterpreterTool(BaseTool):
existing_container = client.containers.get(container_name)
existing_container.stop()
existing_container.remove()
except docker.errors.NotFound:
except NotFound:
pass # Container does not exist, no need to remove
return client.containers.run(
@@ -109,8 +112,7 @@ class CodeInterpreterTool(BaseTool):
container = self._init_docker_container()
self._install_libraries(container, libraries_used)
cmd_to_run = f'python3 -c "{code}"'
exec_result = container.exec_run(cmd_to_run)
exec_result = container.exec_run(["python3", "-c", code])
container.stop()
container.remove()

View File

@@ -5,8 +5,7 @@ Composio tools wrapper.
import typing as t
import typing_extensions as te
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
class ComposioTool(BaseTool):

View File

@@ -1,11 +1,10 @@
import json
from typing import Type
from crewai.tools import BaseTool
from openai import OpenAI
from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel):
"""Input for Dall-E Tool."""

View File

@@ -1,10 +1,9 @@
import os
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedDirectoryReadToolSchema(BaseModel):
"""Input for DirectoryReadTool."""

View File

@@ -1,10 +1,8 @@
import os
from typing import Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class EXABaseToolToolSchema(BaseModel):
"""Input for EXABaseTool."""
@@ -28,10 +26,10 @@ class EXABaseTool(BaseTool):
}
def _parse_results(self, results):
stirng = []
string = []
for result in results:
try:
stirng.append(
string.append(
"\n".join(
[
f"Title: {result['title']}",
@@ -43,7 +41,7 @@ class EXABaseTool(BaseTool):
)
)
except KeyError:
next
continue
content = "\n".join(stirng)
content = "\n".join(string)
return f"\nSearch results: {content}\n"

View File

@@ -1,28 +1,30 @@
import os
import requests
from typing import Any
import requests
from .exa_base_tool import EXABaseTool
class EXASearchTool(EXABaseTool):
def _run(
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get('search_query')
if search_query is None:
search_query = kwargs.get('query')
def _run(
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get("search_query")
if search_query is None:
search_query = kwargs.get("query")
payload = {
"query": search_query,
"type": "magic",
}
payload = {
"query": search_query,
"type": "magic",
}
headers = self.headers.copy()
headers["x-api-key"] = os.environ['EXA_API_KEY']
headers = self.headers.copy()
headers["x-api-key"] = os.environ["EXA_API_KEY"]
response = requests.post(self.search_url, json=payload, headers=headers)
results = response.json()
if 'results' in results:
results = super()._parse_results(results['results'])
return results
response = requests.post(self.search_url, json=payload, headers=headers)
results = response.json()
if "results" in results:
results = super()._parse_results(results["results"])
return results

View File

@@ -1,43 +1,81 @@
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedFileReadToolSchema(BaseModel):
"""Input for FileReadTool."""
pass
class FileReadToolSchema(FixedFileReadToolSchema):
class FileReadToolSchema(BaseModel):
"""Input for FileReadTool."""
file_path: str = Field(..., description="Mandatory file full path to read the file")
class FileReadTool(BaseTool):
"""A tool for reading file contents.
This tool inherits its schema handling from BaseTool to avoid recursive schema
definition issues. The args_schema is set to FileReadToolSchema which defines
the required file_path parameter. The schema should not be overridden in the
constructor as it would break the inheritance chain and cause infinite loops.
The tool supports two ways of specifying the file path:
1. At construction time via the file_path parameter
2. At runtime via the file_path parameter in the tool's input
Args:
file_path (Optional[str]): Path to the file to be read. If provided,
this becomes the default file path for the tool.
**kwargs: Additional keyword arguments passed to BaseTool.
Example:
>>> tool = FileReadTool(file_path="/path/to/file.txt")
>>> content = tool.run() # Reads /path/to/file.txt
>>> content = tool.run(file_path="/path/to/other.txt") # Reads other.txt
"""
name: str = "Read a file's content"
description: str = "A tool that can be used to read a file's content."
description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read."
args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None
def __init__(self, file_path: Optional[str] = None, **kwargs):
def __init__(self, file_path: Optional[str] = None, **kwargs: Any) -> None:
"""Initialize the FileReadTool.
Args:
file_path (Optional[str]): Path to the file to be read. If provided,
this becomes the default file path for the tool.
**kwargs: Additional keyword arguments passed to BaseTool.
"""
super().__init__(**kwargs)
if file_path is not None:
self.file_path = file_path
self.description = f"A tool that can be used to read {file_path}'s content."
self.args_schema = FixedFileReadToolSchema
self._generate_description()
self.description = f"A tool that reads file content. The default file is {file_path}, but you can provide a different 'file_path' parameter to read another file."
def _run(
self,
**kwargs: Any,
) -> Any:
) -> str:
file_path = kwargs.get("file_path", self.file_path)
if file_path is None:
return "Error: No file path provided. Please provide a file path either in the constructor or as an argument."
try:
file_path = kwargs.get("file_path", self.file_path)
with open(file_path, "r") as file:
return file.read()
except FileNotFoundError:
return f"Error: File not found at path: {file_path}"
except PermissionError:
return f"Error: Permission denied when trying to read file: {file_path}"
except Exception as e:
return f"Fail to read the file {file_path}. Error: {e}"
return f"Error: Failed to read file {file_path}. {str(e)}"
def _generate_description(self) -> None:
"""Generate the tool description based on file path.
This method updates the tool's description to include information about
the default file path while maintaining the ability to specify a different
file at runtime.
Returns:
None
"""
self.description = f"A tool that can be used to read {self.file_path}'s content."

View File

@@ -1,16 +1,18 @@
import os
from typing import Any, Optional, Type
from pydantic import BaseModel
from ..base_tool import BaseTool
from distutils.util import strtobool
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel
class FileWriterToolInput(BaseModel):
filename: str
filename: str
directory: Optional[str] = "./"
overwrite: str = "False"
content: str
class FileWriterTool(BaseTool):
name: str = "File Writer Tool"
description: str = (
@@ -26,7 +28,7 @@ class FileWriterTool(BaseTool):
# Construct the full path
filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"])
# Convert overwrite to boolean
kwargs["overwrite"] = bool(strtobool(kwargs["overwrite"]))
@@ -46,4 +48,4 @@ class FileWriterTool(BaseTool):
except KeyError as e:
return f"An error occurred while accessing key: {str(e)}"
except Exception as e:
return f"An error occurred while writing to the file: {str(e)}"
return f"An error occurred while writing to the file: {str(e)}"

View File

@@ -1,8 +1,9 @@
import os
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
# Type checking import
if TYPE_CHECKING:
@@ -11,22 +12,33 @@ if TYPE_CHECKING:
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
crawler_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for crawling"
)
page_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for page"
)
class FirecrawlCrawlWebsiteTool(BaseTool):
model_config = ConfigDict(
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
)
name: str = "Firecrawl web crawl tool"
description: str = "Crawl webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
firecrawl_app: Optional["FirecrawlApp"] = None
api_key: Optional[str] = None
firecrawl: Optional["FirecrawlApp"] = None
url: Optional[str] = None
params: Optional[Dict[str, Any]] = None
poll_interval: Optional[int] = 2
idempotency_key: Optional[str] = None
def __init__(self, api_key: Optional[str] = None, **kwargs):
"""Initialize FirecrawlCrawlWebsiteTool.
Args:
api_key (Optional[str]): Firecrawl API key. If not provided, will check FIRECRAWL_API_KEY env var.
url (Optional[str]): Base URL to crawl. Can be overridden by the _run method.
firecrawl_app (Optional[FirecrawlApp]): Previously created FirecrawlApp instance.
params (Optional[Dict[str, Any]]): Additional parameters to pass to the FirecrawlApp.
poll_interval (Optional[int]): Poll interval for the FirecrawlApp.
idempotency_key (Optional[str]): Idempotency key for the FirecrawlApp.
**kwargs: Additional arguments passed to BaseTool.
"""
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
@@ -35,18 +47,37 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
# Allows passing a previously created FirecrawlApp instance
# or builds a new one with the provided API key
if not self.firecrawl_app:
client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY")
if not client_api_key:
raise ValueError(
"FIRECRAWL_API_KEY is not set. Please provide it either via the constructor "
"with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable."
)
self.firecrawl_app = FirecrawlApp(api_key=client_api_key)
def _run(
self,
url: str,
crawler_options: Optional[Dict[str, Any]] = None,
page_options: Optional[Dict[str, Any]] = None,
):
if crawler_options is None:
crawler_options = {}
if page_options is None:
page_options = {}
def _run(self, url: str):
# Unless url has been previously set via constructor by the user,
# use the url argument provided by the agent at runtime.
base_url = self.url or url
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
return self.firecrawl.crawl_url(url, options)
return self.firecrawl_app.crawl_url(
base_url,
params=self.params,
poll_interval=self.poll_interval,
idempotency_key=self.idempotency_key
)
try:
from firecrawl import FirecrawlApp
# Must rebuild model after class is defined
FirecrawlCrawlWebsiteTool.model_rebuild()
except ImportError:
"""
When this tool is not used, then exception can be ignored.
"""
pass

View File

@@ -1,8 +1,7 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field
# Type checking import
if TYPE_CHECKING:
@@ -24,6 +23,9 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel):
class FirecrawlScrapeWebsiteTool(BaseTool):
model_config = ConfigDict(
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
)
name: str = "Firecrawl web scrape tool"
description: str = "Scrape webpages url using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
@@ -61,3 +63,15 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
"timeout": timeout,
}
return self.firecrawl.scrape_url(url, options)
try:
from firecrawl import FirecrawlApp
# Must rebuild model after class is defined
FirecrawlScrapeWebsiteTool.model_rebuild()
except ImportError:
"""
When this tool is not used, then exception can be ignored.
"""
pass

View File

@@ -1,9 +1,8 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
# Type checking import
if TYPE_CHECKING:
from firecrawl import FirecrawlApp

View File

@@ -0,0 +1,38 @@
# JinaScrapeWebsiteTool
## Description
A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
## Installation
Install the crewai_tools package
```shell
pip install 'crewai[tools]'
```
## Example
```python
from crewai_tools import JinaScrapeWebsiteTool
# To enable scraping any website it finds during its execution
tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY')
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
tool = JinaScrapeWebsiteTool(website_url='https://www.example.com')
# With custom headers
tool = JinaScrapeWebsiteTool(
website_url='https://www.example.com',
custom_headers={'X-Target-Selector': 'body, .class, #id'}
)
```
## Authentication
The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key.
## Arguments
- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read.
- `api_key`: Optional Jina.ai API key for authenticated access to the reader service.
- `custom_headers`: Optional dictionary of HTTP headers to use when making requests.
## Note
This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities.

View File

@@ -0,0 +1,54 @@
from typing import Optional, Type
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
class JinaScrapeWebsiteToolInput(BaseModel):
"""Input schema for JinaScrapeWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
class JinaScrapeWebsiteTool(BaseTool):
name: str = "JinaScrapeWebsiteTool"
description: str = (
"A tool that can be used to read a website content using Jina.ai reader and return markdown content."
)
args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput
website_url: Optional[str] = None
api_key: Optional[str] = None
headers: dict = {}
def __init__(
self,
website_url: Optional[str] = None,
api_key: Optional[str] = None,
custom_headers: Optional[dict] = None,
**kwargs,
):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.description = f"A tool that can be used to read {website_url}'s content and return markdown content."
self._generate_description()
if custom_headers is not None:
self.headers = custom_headers
if api_key is not None:
self.headers["Authorization"] = f"Bearer {api_key}"
def _run(self, website_url: Optional[str] = None) -> str:
url = website_url or self.website_url
if not url:
raise ValueError(
"Website URL must be provided either during initialization or execution"
)
response = requests.get(
f"https://r.jina.ai/{url}", headers=self.headers, timeout=15
)
response.raise_for_status()
return response.text

View File

@@ -0,0 +1,98 @@
# Linkup Search Tool
## Description
The `LinkupSearchTool` is a tool designed for integration with the CrewAI framework. It provides the ability to query the Linkup API for contextual information and retrieve structured results. This tool is ideal for enriching workflows with up-to-date and reliable information from Linkup.
---
## Features
- Perform API queries to the Linkup platform using customizable parameters (`query`, `depth`, `output_type`).
- Gracefully handles API errors and provides structured feedback.
- Returns well-structured results for seamless integration into CrewAI processes.
---
## Installation
### Prerequisites
- Linkup API Key
### Steps
1. ```shell
pip install 'crewai[tools]'
```
2. Create a `.env` file in your project root and add your Linkup API Key:
```plaintext
LINKUP_API_KEY=your_linkup_api_key
```
---
## Usage
### Basic Example
Here is how to use the `LinkupSearchTool` in a CrewAI project:
1. **Import and Initialize**:
```python
from tools.linkup_tools import LinkupSearchTool
import os
from dotenv import load_dotenv
load_dotenv()
linkup_tool = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY"))
```
2. **Set Up an Agent and Task**:
```python
from crewai import Agent, Task, Crew
# Define the agent
research_agent = Agent(
role="Information Researcher",
goal="Fetch relevant results from Linkup.",
backstory="An expert in online information retrieval...",
tools=[linkup_tool],
verbose=True
)
# Define the task
search_task = Task(
expected_output="A detailed list of Nobel Prize-winning women in physics with their achievements.",
description="Search for women who have won the Nobel Prize in Physics.",
agent=research_agent
)
# Create and run the crew
crew = Crew(
agents=[research_agent],
tasks=[search_task]
)
result = crew.kickoff()
print(result)
```
### Advanced Configuration
You can customize the parameters for the `LinkupSearchTool`:
- `query`: The search term or phrase.
- `depth`: The search depth (`"standard"` by default).
- `output_type`: The type of output (`"searchResults"` by default).
Example:
```python
response = linkup_tool._run(
query="Women Nobel Prize Physics",
depth="standard",
output_type="searchResults"
)
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

View File

@@ -0,0 +1,49 @@
from typing import Any
try:
from linkup import LinkupClient
LINKUP_AVAILABLE = True
except ImportError:
LINKUP_AVAILABLE = False
LinkupClient = Any # type placeholder when package is not available
from pydantic import PrivateAttr
class LinkupSearchTool:
name: str = "Linkup Search Tool"
description: str = "Performs an API call to Linkup to retrieve contextual information."
_client: LinkupClient = PrivateAttr() # type: ignore
def __init__(self, api_key: str):
"""
Initialize the tool with an API key.
"""
if not LINKUP_AVAILABLE:
raise ImportError(
"The 'linkup' package is required to use the LinkupSearchTool. "
"Please install it with: uv add linkup"
)
self._client = LinkupClient(api_key=api_key)
def _run(self, query: str, depth: str = "standard", output_type: str = "searchResults") -> dict:
"""
Executes a search using the Linkup API.
:param query: The query to search for.
:param depth: Search depth (default is "standard").
:param output_type: Desired result type (default is "searchResults").
:return: A dictionary containing the results or an error message.
"""
try:
response = self._client.search(
query=query,
depth=depth,
output_type=output_type
)
results = [
{"name": result.name, "url": result.url, "content": result.content}
for result in response.results
]
return {"success": True, "results": results}
except Exception as e:
return {"success": False, "error": str(e)}

View File

@@ -1,9 +1,8 @@
from typing import Any, Optional, Type, cast
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class LlamaIndexTool(BaseTool):
"""Tool to wrap LlamaIndex tools/query engines."""
@@ -19,6 +18,10 @@ class LlamaIndexTool(BaseTool):
from llama_index.core.tools import BaseTool as LlamaBaseTool
tool = cast(LlamaBaseTool, self.llama_index_tool)
if self.result_as_answer:
return tool(*args, **kwargs).content
return tool(*args, **kwargs)
@classmethod

View File

@@ -41,7 +41,7 @@ crew.kickoff()
## Arguments
- `api_key`: Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
- `api_key`: Specifies MultiOn API key. Default is the `MULTION_API_KEY` environment variable.
- `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked.
- `max_steps`: Optional. Set the max_steps the multion agent can take for a command
@@ -51,4 +51,3 @@ To effectively use the `MultiOnTool`, follow these steps:
1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension).
3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API

View File

@@ -2,7 +2,7 @@
from typing import Any, Optional
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
class MultiOnTool(BaseTool):

View File

@@ -2,9 +2,9 @@
## Description
This tool is used to convert natural language to SQL queries. When passsed to the agent it will generate queries and then use them to interact with the database.
This tool is used to convert natural language to SQL queries. When passed to the agent it will generate queries and then use them to interact with the database.
This enables multiple workflows like having an Agent to access the database fetch information based on the goal and then use the information to generate a response, report or any other output. Along with that proivdes the ability for the Agent to update the database based on its goal.
This enables multiple workflows like having an Agent to access the database fetch information based on the goal and then use the information to generate a response, report or any other output. Along with that provides the ability for the Agent to update the database based on its goal.
**Attention**: Make sure that the Agent has access to a Read-Replica or that is okay for the Agent to run insert/update queries on the database.
@@ -23,7 +23,6 @@ pip install 'crewai[tools]'
In order to use the NL2SQLTool, you need to pass the database URI to the tool. The URI should be in the format `dialect+driver://username:password@host:port/database`.
```python
from crewai_tools import NL2SQLTool
@@ -43,7 +42,7 @@ def researcher(self) -> Agent:
The primary task goal was:
"Retrieve the average, maximum, and minimum monthly revenue for each city, but only include cities that have more than one user. Also, count the number of user in each city and sort the results by the average monthly revenue in descending order"
"Retrieve the average, maximum, and minimum monthly revenue for each city, but only include cities that have more than one user. Also, count the number of users in each city and sort the results by the average monthly revenue in descending order"
So the Agent tried to get information from the DB, the first one is wrong so the Agent tries again and gets the correct information and passes to the next agent.

View File

@@ -1,11 +1,10 @@
from typing import Any, Union
from typing import Any, Type, Union
from ..base_tool import BaseTool
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from typing import Type, Any
class NL2SQLToolInput(BaseModel):
sql_query: str = Field(
@@ -13,6 +12,7 @@ class NL2SQLToolInput(BaseModel):
description="The SQL query to execute.",
)
class NL2SQLTool(BaseTool):
name: str = "NL2SQLTool"
description: str = "Converts natural language to SQL queries and executes them."

View File

@@ -1,10 +1,9 @@
from abc import ABC, abstractmethod
from typing import Any
from crewai.tools import BaseTool
from pydantic import BaseModel, Field, model_validator
from crewai_tools.tools.base_tool import BaseTool
class Adapter(BaseModel, ABC):
class Config:

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
import requests
from bs4 import BeautifulSoup
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
"""Input for ScrapeElementFromWebsiteTool."""

View File

@@ -1,12 +1,12 @@
import os
import re
from typing import Any, Optional, Type
import requests
from bs4 import BeautifulSoup
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedScrapeWebsiteToolSchema(BaseModel):
"""Input for ScrapeWebsiteTool."""
@@ -67,7 +67,7 @@ class ScrapeWebsiteTool(BaseTool):
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text()
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
text = parsed.get_text(" ")
text = re.sub("[ \t]+", " ", text)
text = re.sub("\\s+\n\\s+", "\n", text)
return text

View File

@@ -0,0 +1,84 @@
# ScrapegraphScrapeTool
## Description
A tool that leverages Scrapegraph AI's SmartScraper API to intelligently extract content from websites. This tool provides advanced web scraping capabilities with AI-powered content extraction, making it ideal for targeted data collection and content analysis tasks.
## Installation
Install the required packages:
```shell
pip install 'crewai[tools]'
```
## Example Usage
### Basic Usage
```python
from crewai_tools import ScrapegraphScrapeTool
# Basic usage with API key
tool = ScrapegraphScrapeTool(api_key="your_api_key")
result = tool.run(
website_url="https://www.example.com",
user_prompt="Extract the main heading and summary"
)
```
### Fixed Website URL
```python
# Initialize with a fixed website URL
tool = ScrapegraphScrapeTool(
website_url="https://www.example.com",
api_key="your_api_key"
)
result = tool.run()
```
### Custom Prompt
```python
# With custom prompt
tool = ScrapegraphScrapeTool(
api_key="your_api_key",
user_prompt="Extract all product prices and descriptions"
)
result = tool.run(website_url="https://www.example.com")
```
### Error Handling
```python
try:
tool = ScrapegraphScrapeTool(api_key="your_api_key")
result = tool.run(
website_url="https://www.example.com",
user_prompt="Extract the main heading"
)
except ValueError as e:
print(f"Configuration error: {e}") # Handles invalid URLs or missing API keys
except RuntimeError as e:
print(f"Scraping error: {e}") # Handles API or network errors
```
## Arguments
- `website_url`: The URL of the website to scrape (required if not set during initialization)
- `user_prompt`: Custom instructions for content extraction (optional)
- `api_key`: Your Scrapegraph API key (required, can be set via SCRAPEGRAPH_API_KEY environment variable)
## Environment Variables
- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key, you can obtain one [here](https://scrapegraphai.com)
## Rate Limiting
The Scrapegraph API has rate limits that vary based on your subscription plan. Consider the following best practices:
- Implement appropriate delays between requests when processing multiple URLs
- Handle rate limit errors gracefully in your application
- Check your API plan limits on the Scrapegraph dashboard
## Error Handling
The tool may raise the following exceptions:
- `ValueError`: When API key is missing or URL format is invalid
- `RuntimeError`: When scraping operation fails (network issues, API errors)
- `RateLimitError`: When API rate limits are exceeded
## Best Practices
1. Always validate URLs before making requests
2. Implement proper error handling as shown in examples
3. Consider caching results for frequently accessed pages
4. Monitor your API usage through the Scrapegraph dashboard

View File

@@ -0,0 +1,147 @@
import os
from typing import Any, Optional, Type
from urllib.parse import urlparse
from crewai.tools import BaseTool
from pydantic import BaseModel, Field, validator
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger
class ScrapegraphError(Exception):
"""Base exception for Scrapegraph-related errors"""
pass
class RateLimitError(ScrapegraphError):
"""Raised when API rate limits are exceeded"""
pass
class FixedScrapegraphScrapeToolSchema(BaseModel):
"""Input for ScrapegraphScrapeTool when website_url is fixed."""
pass
class ScrapegraphScrapeToolSchema(FixedScrapegraphScrapeToolSchema):
"""Input for ScrapegraphScrapeTool."""
website_url: str = Field(..., description="Mandatory website url to scrape")
user_prompt: str = Field(
default="Extract the main content of the webpage",
description="Prompt to guide the extraction of content",
)
@validator('website_url')
def validate_url(cls, v):
"""Validate URL format"""
try:
result = urlparse(v)
if not all([result.scheme, result.netloc]):
raise ValueError
return v
except Exception:
raise ValueError("Invalid URL format. URL must include scheme (http/https) and domain")
class ScrapegraphScrapeTool(BaseTool):
"""
A tool that uses Scrapegraph AI to intelligently scrape website content.
Raises:
ValueError: If API key is missing or URL format is invalid
RateLimitError: If API rate limits are exceeded
RuntimeError: If scraping operation fails
"""
name: str = "Scrapegraph website scraper"
description: str = "A tool that uses Scrapegraph AI to intelligently scrape website content."
args_schema: Type[BaseModel] = ScrapegraphScrapeToolSchema
website_url: Optional[str] = None
user_prompt: Optional[str] = None
api_key: Optional[str] = None
def __init__(
self,
website_url: Optional[str] = None,
user_prompt: Optional[str] = None,
api_key: Optional[str] = None,
**kwargs,
):
super().__init__(**kwargs)
self.api_key = api_key or os.getenv("SCRAPEGRAPH_API_KEY")
if not self.api_key:
raise ValueError("Scrapegraph API key is required")
if website_url is not None:
self._validate_url(website_url)
self.website_url = website_url
self.description = f"A tool that uses Scrapegraph AI to intelligently scrape {website_url}'s content."
self.args_schema = FixedScrapegraphScrapeToolSchema
if user_prompt is not None:
self.user_prompt = user_prompt
# Configure logging
sgai_logger.set_logging(level="INFO")
@staticmethod
def _validate_url(url: str) -> None:
"""Validate URL format"""
try:
result = urlparse(url)
if not all([result.scheme, result.netloc]):
raise ValueError
except Exception:
raise ValueError("Invalid URL format. URL must include scheme (http/https) and domain")
def _handle_api_response(self, response: dict) -> str:
"""Handle and validate API response"""
if not response:
raise RuntimeError("Empty response from Scrapegraph API")
if "error" in response:
error_msg = response.get("error", {}).get("message", "Unknown error")
if "rate limit" in error_msg.lower():
raise RateLimitError(f"Rate limit exceeded: {error_msg}")
raise RuntimeError(f"API error: {error_msg}")
if "result" not in response:
raise RuntimeError("Invalid response format from Scrapegraph API")
return response["result"]
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get("website_url", self.website_url)
user_prompt = kwargs.get("user_prompt", self.user_prompt) or "Extract the main content of the webpage"
if not website_url:
raise ValueError("website_url is required")
# Validate URL format
self._validate_url(website_url)
# Initialize the client
sgai_client = Client(api_key=self.api_key)
try:
# Make the SmartScraper request
response = sgai_client.smartscraper(
website_url=website_url,
user_prompt=user_prompt,
)
# Handle and validate the response
return self._handle_api_response(response)
except RateLimitError:
raise # Re-raise rate limit errors
except Exception as e:
raise RuntimeError(f"Scraping failed: {str(e)}")
finally:
# Always close the client
sgai_client.close()

View File

@@ -1,10 +1,9 @@
import logging
from typing import Any, Dict, Literal, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
logger = logging.getLogger(__file__)

View File

@@ -24,6 +24,16 @@ tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.mai
# Example 4: Scrape using optional parameters for customized scraping
tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.main-content', cookie={'name': 'user', 'value': 'John Doe'})
# Example 5: Scrape content in HTML format
tool = SeleniumScrapingTool(website_url='https://example.com', return_html=True)
result = tool._run()
# Returns HTML content like: ['<div class="content">Hello World</div>', '<div class="footer">Copyright 2024</div>']
# Example 6: Scrape content in text format (default)
tool = SeleniumScrapingTool(website_url='https://example.com', return_html=False)
result = tool._run()
# Returns text content like: ['Hello World', 'Copyright 2024']
```
## Arguments
@@ -31,3 +41,4 @@ tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.mai
- `css_element`: Mandatory. The CSS selector for a specific element to scrape from the website.
- `cookie`: Optional. A dictionary containing cookie information. This parameter allows the tool to simulate a session with cookie information, providing access to content that may be restricted to logged-in users.
- `wait_time`: Optional. The number of seconds the tool waits after loading the website and after setting a cookie, before scraping the content. This allows for dynamic content to load properly.
- `return_html`: Optional. If True, the tool returns HTML content. If False, the tool returns text content.

View File

@@ -1,30 +1,51 @@
import re
import time
from typing import Any, Optional, Type
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from pydantic import BaseModel, Field
from crewai.tools import BaseTool
from pydantic import BaseModel, Field, validator
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from ..base_tool import BaseTool
class FixedSeleniumScrapingToolSchema(BaseModel):
"""Input for SeleniumScrapingTool."""
pass
class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
"""Input for SeleniumScrapingTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
website_url: str = Field(..., description="Mandatory website url to read the file. Must start with http:// or https://")
css_element: str = Field(
...,
description="Mandatory css reference for element to scrape from the website",
)
@validator('website_url')
def validate_website_url(cls, v):
if not v:
raise ValueError("Website URL cannot be empty")
if len(v) > 2048: # Common maximum URL length
raise ValueError("URL is too long (max 2048 characters)")
if not re.match(r'^https?://', v):
raise ValueError("URL must start with http:// or https://")
try:
result = urlparse(v)
if not all([result.scheme, result.netloc]):
raise ValueError("Invalid URL format")
except Exception as e:
raise ValueError(f"Invalid URL: {str(e)}")
if re.search(r'\s', v):
raise ValueError("URL cannot contain whitespace")
return v
class SeleniumScrapingTool(BaseTool):
name: str = "Read a website content"
@@ -35,6 +56,7 @@ class SeleniumScrapingTool(BaseTool):
cookie: Optional[dict] = None
wait_time: Optional[int] = 3
css_element: Optional[str] = None
return_html: Optional[bool] = False
def __init__(
self,
@@ -65,19 +87,54 @@ class SeleniumScrapingTool(BaseTool):
) -> Any:
website_url = kwargs.get("website_url", self.website_url)
css_element = kwargs.get("css_element", self.css_element)
return_html = kwargs.get("return_html", self.return_html)
driver = self._create_driver(website_url, self.cookie, self.wait_time)
content = []
if css_element is None or css_element.strip() == "":
body_text = driver.find_element(By.TAG_NAME, "body").text
content.append(body_text)
else:
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
content.append(element.text)
content = self._get_content(driver, css_element, return_html)
driver.close()
return "\n".join(content)
def _get_content(self, driver, css_element, return_html):
content = []
if self._is_css_element_empty(css_element):
content.append(self._get_body_content(driver, return_html))
else:
content.extend(self._get_elements_content(driver, css_element, return_html))
return content
def _is_css_element_empty(self, css_element):
return css_element is None or css_element.strip() == ""
def _get_body_content(self, driver, return_html):
body_element = driver.find_element(By.TAG_NAME, "body")
return (
body_element.get_attribute("outerHTML")
if return_html
else body_element.text
)
def _get_elements_content(self, driver, css_element, return_html):
elements_content = []
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
elements_content.append(
element.get_attribute("outerHTML") if return_html else element.text
)
return elements_content
def _create_driver(self, url, cookie, wait_time):
if not url:
raise ValueError("URL cannot be empty")
# Validate URL format
if not re.match(r'^https?://', url):
raise ValueError("URL must start with http:// or https://")
options = Options()
options.add_argument("--headless")
driver = self.driver(options=options)

View File

@@ -0,0 +1,32 @@
# SerpApi Tools
## Description
[SerpApi](https://serpapi.com/) tools are built for searching information in the internet. It currently supports:
- Google Search
- Google Shopping
To successfully make use of SerpApi tools, you have to have `SERPAPI_API_KEY` set in the environment. To get the API key, register a free account at [SerpApi](https://serpapi.com/).
## Installation
To start using the SerpApi Tools, you must first install the `crewai_tools` package. This can be easily done with the following command:
```shell
pip install 'crewai[tools]'
```
## Examples
The following example demonstrates how to initialize the tool
### Google Search
```python
from crewai_tools import SerpApiGoogleSearchTool
tool = SerpApiGoogleSearchTool()
```
### Google Shopping
```python
from crewai_tools import SerpApiGoogleShoppingTool
tool = SerpApiGoogleShoppingTool()
```

View File

@@ -0,0 +1,38 @@
import os
import re
from typing import Optional, Any, Union
from crewai.tools import BaseTool
class SerpApiBaseTool(BaseTool):
"""Base class for SerpApi functionality with shared capabilities."""
client: Optional[Any] = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
try:
from serpapi import Client
except ImportError:
raise ImportError(
"`serpapi` package not found, please install with `pip install serpapi`"
)
api_key = os.getenv("SERPAPI_API_KEY")
if not api_key:
raise ValueError(
"Missing API key, you can get the key from https://serpapi.com/manage-api-key"
)
self.client = Client(api_key=api_key)
def _omit_fields(self, data: Union[dict, list], omit_patterns: list[str]) -> None:
if isinstance(data, dict):
for field in list(data.keys()):
if any(re.compile(p).match(field) for p in omit_patterns):
data.pop(field, None)
else:
if isinstance(data[field], (dict, list)):
self._omit_fields(data[field], omit_patterns)
elif isinstance(data, list):
for item in data:
self._omit_fields(item, omit_patterns)

View File

@@ -0,0 +1,40 @@
from typing import Any, Type, Optional
import re
from pydantic import BaseModel, Field
from .serpapi_base_tool import SerpApiBaseTool
from serpapi import HTTPError
class SerpApiGoogleSearchToolSchema(BaseModel):
"""Input for Google Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google search.")
location: Optional[str] = Field(None, description="Location you want the search to be performed in.")
class SerpApiGoogleSearchTool(SerpApiBaseTool):
name: str = "Google Search"
description: str = (
"A tool to perform to perform a Google search with a search_query."
)
args_schema: Type[BaseModel] = SerpApiGoogleSearchToolSchema
def _run(
self,
**kwargs: Any,
) -> Any:
try:
results = self.client.search({
"q": kwargs.get("search_query"),
"location": kwargs.get("location"),
}).as_dict()
self._omit_fields(
results,
[r"search_metadata", r"search_parameters", r"serpapi_.+", r".+_token", r"displayed_link", r"pagination"]
)
return results
except HTTPError as e:
return f"An error occurred: {str(e)}. Some parameters may be invalid."

View File

@@ -0,0 +1,42 @@
from typing import Any, Type, Optional
import re
from pydantic import BaseModel, Field
from .serpapi_base_tool import SerpApiBaseTool
from serpapi import HTTPError
class SerpApiGoogleShoppingToolSchema(BaseModel):
"""Input for Google Shopping."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google shopping.")
location: Optional[str] = Field(None, description="Location you want the search to be performed in.")
class SerpApiGoogleShoppingTool(SerpApiBaseTool):
name: str = "Google Shopping"
description: str = (
"A tool to perform search on Google shopping with a search_query."
)
args_schema: Type[BaseModel] = SerpApiGoogleShoppingToolSchema
def _run(
self,
**kwargs: Any,
) -> Any:
try:
results = self.client.search({
"engine": "google_shopping",
"q": kwargs.get("search_query"),
"location": kwargs.get("location")
}).as_dict()
self._omit_fields(
results,
[r"search_metadata", r"search_parameters", r"serpapi_.+", r"filters", r"pagination"]
)
return results
except HTTPError as e:
return f"An error occurred: {str(e)}. Some parameters may be invalid."

View File

@@ -1,30 +1,49 @@
# SerperDevTool Documentation
## Description
This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `serper.dev` API to fetch and display the most relevant search results based on the query provided by the user.
The SerperDevTool is a powerful search tool that interfaces with the `serper.dev` API to perform internet searches. It supports multiple search types including general search and news search, with features like knowledge graph integration, organic results, "People Also Ask" questions, and related searches.
## Features
- Multiple search types: 'search' (default) and 'news'
- Knowledge graph integration for enhanced search context
- Organic search results with sitelinks
- "People Also Ask" questions and answers
- Related searches suggestions
- News search with date, source, and image information
- Configurable number of results
- Optional result saving to file
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Example
The following example demonstrates how to initialize the tool and execute a search with a given query:
## Usage
```python
from crewai_tools import SerperDevTool
# Initialize the tool for internet searching capabilities
tool = SerperDevTool()
# Initialize the tool
tool = SerperDevTool(
n_results=10, # Optional: Number of results to return (default: 10)
save_file=False, # Optional: Save results to file (default: False)
search_type="search" # Optional: Type of search - "search" or "news" (default: "search")
)
# Execute a search
results = tool._run(search_query="your search query")
```
## Steps to Get Started
To effectively use the `SerperDevTool`, follow these steps:
## Configuration
1. **API Key Setup**:
- Sign up for an account at `serper.dev`
- Obtain your API key
- Set the environment variable: `SERPER_API_KEY`
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at `serper.dev`.
3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPER_API_KEY` to facilitate its use by the tool.
## Conclusion
By integrating the `SerperDevTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
## Response Format
The tool returns structured data including:
- Search parameters
- Knowledge graph data (for general search)
- Organic search results
- "People Also Ask" questions
- Related searches
- News results (for news search type)

View File

@@ -1,20 +1,29 @@
import datetime
import json
import os
from typing import Any, Optional, Type
import logging
from typing import Any, Type
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, "w") as file:
file.write(content)
print(f"Results saved to {filename}")
try:
filename = f"search_results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, "w") as file:
file.write(content)
logger.info(f"Results saved to {filename}")
except IOError as e:
logger.error(f"Failed to save results to file: {e}")
raise
class SerperDevToolSchema(BaseModel):
@@ -28,67 +37,199 @@ class SerperDevToolSchema(BaseModel):
class SerperDevTool(BaseTool):
name: str = "Search the internet"
description: str = (
"A tool that can be used to search the internet with a search_query."
"A tool that can be used to search the internet with a search_query. "
"Supports different search types: 'search' (default), 'news'"
)
args_schema: Type[BaseModel] = SerperDevToolSchema
search_url: str = "https://google.serper.dev/search"
country: Optional[str] = ""
location: Optional[str] = ""
locale: Optional[str] = ""
base_url: str = "https://google.serper.dev"
n_results: int = 10
save_file: bool = False
search_type: str = "search"
def _run(
self,
**kwargs: Any,
) -> Any:
def _get_search_url(self, search_type: str) -> str:
"""Get the appropriate endpoint URL based on search type."""
search_type = search_type.lower()
allowed_search_types = ["search", "news"]
if search_type not in allowed_search_types:
raise ValueError(
f"Invalid search type: {search_type}. Must be one of: {', '.join(allowed_search_types)}"
)
return f"{self.base_url}/{search_type}"
search_query = kwargs.get("search_query") or kwargs.get("query")
save_file = kwargs.get("save_file", self.save_file)
n_results = kwargs.get("n_results", self.n_results)
def _process_knowledge_graph(self, kg: dict) -> dict:
"""Process knowledge graph data from search results."""
return {
"title": kg.get("title", ""),
"type": kg.get("type", ""),
"website": kg.get("website", ""),
"imageUrl": kg.get("imageUrl", ""),
"description": kg.get("description", ""),
"descriptionSource": kg.get("descriptionSource", ""),
"descriptionLink": kg.get("descriptionLink", ""),
"attributes": kg.get("attributes", {}),
}
payload = {"q": search_query, "num": n_results}
def _process_organic_results(self, organic_results: list) -> list:
"""Process organic search results."""
processed_results = []
for result in organic_results[: self.n_results]:
try:
result_data = {
"title": result["title"],
"link": result["link"],
"snippet": result.get("snippet", ""),
"position": result.get("position"),
}
if self.country != "":
payload["gl"] = self.country
if self.location != "":
payload["location"] = self.location
if self.locale != "":
payload["hl"] = self.locale
if "sitelinks" in result:
result_data["sitelinks"] = [
{
"title": sitelink.get("title", ""),
"link": sitelink.get("link", ""),
}
for sitelink in result["sitelinks"]
]
payload = json.dumps(payload)
processed_results.append(result_data)
except KeyError:
logger.warning(f"Skipping malformed organic result: {result}")
continue
return processed_results
def _process_people_also_ask(self, paa_results: list) -> list:
"""Process 'People Also Ask' results."""
processed_results = []
for result in paa_results[: self.n_results]:
try:
result_data = {
"question": result["question"],
"snippet": result.get("snippet", ""),
"title": result.get("title", ""),
"link": result.get("link", ""),
}
processed_results.append(result_data)
except KeyError:
logger.warning(f"Skipping malformed PAA result: {result}")
continue
return processed_results
def _process_related_searches(self, related_results: list) -> list:
"""Process related search results."""
processed_results = []
for result in related_results[: self.n_results]:
try:
processed_results.append({"query": result["query"]})
except KeyError:
logger.warning(f"Skipping malformed related search result: {result}")
continue
return processed_results
def _process_news_results(self, news_results: list) -> list:
"""Process news search results."""
processed_results = []
for result in news_results[: self.n_results]:
try:
result_data = {
"title": result["title"],
"link": result["link"],
"snippet": result.get("snippet", ""),
"date": result.get("date", ""),
"source": result.get("source", ""),
"imageUrl": result.get("imageUrl", ""),
}
processed_results.append(result_data)
except KeyError:
logger.warning(f"Skipping malformed news result: {result}")
continue
return processed_results
def _make_api_request(self, search_query: str, search_type: str) -> dict:
"""Make API request to Serper."""
search_url = self._get_search_url(search_type)
payload = json.dumps({"q": search_query, "num": self.n_results})
headers = {
"X-API-KEY": os.environ["SERPER_API_KEY"],
"content-type": "application/json",
}
response = requests.request(
"POST", self.search_url, headers=headers, data=payload
)
results = response.json()
if "organic" in results:
results = results["organic"][: self.n_results]
string = []
for result in results:
try:
string.append(
"\n".join(
[
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Snippet: {result['snippet']}",
"---",
]
)
)
except KeyError:
continue
content = "\n".join(string)
if save_file:
_save_results_to_file(content)
return f"\nSearch results: {content}\n"
else:
response = None
try:
response = requests.post(
search_url, headers=headers, json=json.loads(payload), timeout=10
)
response.raise_for_status()
results = response.json()
if not results:
logger.error("Empty response from Serper API")
raise ValueError("Empty response from Serper API")
return results
except requests.exceptions.RequestException as e:
error_msg = f"Error making request to Serper API: {e}"
if response is not None and hasattr(response, "content"):
error_msg += f"\nResponse content: {response.content}"
logger.error(error_msg)
raise
except json.JSONDecodeError as e:
if response is not None and hasattr(response, "content"):
logger.error(f"Error decoding JSON response: {e}")
logger.error(f"Response content: {response.content}")
else:
logger.error(
f"Error decoding JSON response: {e} (No response content available)"
)
raise
def _process_search_results(self, results: dict, search_type: str) -> dict:
"""Process search results based on search type."""
formatted_results = {}
if search_type == "search":
if "knowledgeGraph" in results:
formatted_results["knowledgeGraph"] = self._process_knowledge_graph(
results["knowledgeGraph"]
)
if "organic" in results:
formatted_results["organic"] = self._process_organic_results(
results["organic"]
)
if "peopleAlsoAsk" in results:
formatted_results["peopleAlsoAsk"] = self._process_people_also_ask(
results["peopleAlsoAsk"]
)
if "relatedSearches" in results:
formatted_results["relatedSearches"] = self._process_related_searches(
results["relatedSearches"]
)
elif search_type == "news":
if "news" in results:
formatted_results["news"] = self._process_news_results(results["news"])
return formatted_results
def _run(self, **kwargs: Any) -> Any:
"""Execute the search operation."""
search_query = kwargs.get("search_query") or kwargs.get("query")
search_type = kwargs.get("search_type", self.search_type)
save_file = kwargs.get("save_file", self.save_file)
results = self._make_api_request(search_query, search_type)
formatted_results = {
"searchParameters": {
"q": search_query,
"type": search_type,
**results.get("searchParameters", {}),
}
}
formatted_results.update(self._process_search_results(results, search_type))
formatted_results["credits"] = results.get("credits", 1)
if save_file:
_save_results_to_file(json.dumps(formatted_results, indent=2))
return formatted_results

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
from urllib.parse import urlencode
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
from urllib.parse import urlencode
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
from urllib.parse import urlencode
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search."""

View File

@@ -1,81 +1,87 @@
# SpiderTool
## Description
[Spider](https://spider.cloud/?ref=crewai) is the [fastest](https://github.com/spider-rs/spider/blob/main/benches/BENCHMARKS.md#benchmark-results) open source scraper and crawler that returns LLM-ready data. It converts any website into pure HTML, markdown, metadata or text while enabling you to crawl with custom actions using AI.
[Spider](https://spider.cloud/?ref=crewai) is a high-performance web scraping and crawling tool that delivers optimized markdown for LLMs and AI agents. It intelligently switches between HTTP requests and JavaScript rendering based on page requirements. Perfect for both single-page scraping and website crawling—making it ideal for content extraction and data collection.
## Installation
To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK too:
To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK, too:
```python
pip install spider-client 'crewai[tools]'
```
## Example
This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is already LLM-ready, so no need to do any cleaning there.
This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is LLM-ready.
```python
from crewai_tools import SpiderTool
def main():
spider_tool = SpiderTool()
searcher = Agent(
role="Web Research Expert",
goal="Find related information from specific URL's",
backstory="An expert web researcher that uses the web extremely well",
tools=[spider_tool],
verbose=True,
)
# To enable scraping any website it finds during its execution
spider_tool = SpiderTool(api_key='YOUR_API_KEY')
return_metadata = Task(
description="Scrape https://spider.cloud with a limit of 1 and enable metadata",
expected_output="Metadata and 10 word summary of spider.cloud",
agent=searcher
)
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
spider_tool = SpiderTool(website_url='https://spider.cloud')
crew = Crew(
agents=[searcher],
tasks=[
return_metadata,
],
verbose=2
)
crew.kickoff()
# Pass in custom parameters, see below for more details
spider_tool = SpiderTool(
website_url='https://spider.cloud',
custom_params={"depth": 2, "anti_bot": True, "proxy_enabled": True}
)
if __name__ == "__main__":
main()
# Advanced usage using css query selector to extract content
css_extraction_map = {
"/": [ # pass in path (main index in this case)
{
"name": "headers", # give it a name for this element
"selectors": [
"h1"
]
}
]
}
spider_tool = SpiderTool(
website_url='https://spider.cloud',
custom_params={"anti_bot": True, "proxy_enabled": True, "metadata": True, "css_extraction_map": css_extraction_map}
)
### Response (extracted text will be in the metadata)
"css_extracted": {
"headers": [
"The Web Crawler for AI Agents and LLMs!"
]
}
```
## Agent setup
```yaml
researcher:
role: >
You're a researcher that is tasked with researching a website and it's content (use crawl mode). The website is to crawl is: {website_url}.
```
## Arguments
- `api_key` (string, optional): Specifies Spider API key. If not specified, it looks for `SPIDER_API_KEY` in environment variables.
- `params` (object, optional): Optional parameters for the request. Defaults to `{"return_format": "markdown"}` to return the website's content in a format that fits LLMs better.
- `website_url` (string): The website URL. Will be used as a fallback if passed when the tool is initialized.
- `log_failures` (bool): Log scrape failures or fail silently. Defaults to `true`.
- `custom_params` (object, optional): Optional parameters for the request.
- `return_format` (string): The return format of the website's content. Defaults to `markdown`.
- `request` (string): The request type to perform. Possible values are `http`, `chrome`, and `smart`. Use `smart` to perform an HTTP request by default until JavaScript rendering is needed for the HTML.
- `limit` (int): The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.
- `depth` (int): The crawl limit for maximum depth. If `0`, no limit will be applied.
- `cache` (bool): Use HTTP caching for the crawl to speed up repeated runs. Default is `true`.
- `budget` (object): Object that has paths with a counter for limiting the amount of pages example `{"*":1}` for only crawling the root page.
- `locale` (string): The locale to use for request, example `en-US`.
- `cookies` (string): Add HTTP cookies to use for request.
- `stealth` (bool): Use stealth mode for headless chrome request to help prevent being blocked. The default is `true` on chrome.
- `headers` (object): Forward HTTP headers to use for all request. The object is expected to be a map of key value pairs.
- `metadata` (bool): Boolean to store metadata about the pages and content found. This could help improve AI interopt. Defaults to `false` unless you have the website already stored with the configuration enabled.
- `viewport` (object): Configure the viewport for chrome. Defaults to `800x600`.
- `encoding` (string): The type of encoding to use like `UTF-8`, `SHIFT_JIS`, or etc.
- `metadata` (bool): Boolean to store metadata about the pages and content found. Defaults to `false`.
- `subdomains` (bool): Allow subdomains to be included. Default is `false`.
- `user_agent` (string): Add a custom HTTP user agent to the request. By default this is set to a random agent.
- `store_data` (bool): Boolean to determine if storage should be used. If set this takes precedence over `storageless`. Defaults to `false`.
- `gpt_config` (object): Use AI to generate actions to perform during the crawl. You can pass an array for the `"prompt"` to chain steps.
- `fingerprint` (bool): Use advanced fingerprint for chrome.
- `storageless` (bool): Boolean to prevent storing any type of data for the request including storage and AI vectors embedding. Defaults to `false` unless you have the website already stored.
- `readability` (bool): Use [readability](https://github.com/mozilla/readability) to pre-process the content for reading. This may drastically improve the content for LLM usage.
`return_format` (string): The format to return the data in. Possible values are `markdown`, `raw`, `text`, and `html2text`. Use `raw` to return the default format of the page like HTML etc.
- `proxy_enabled` (bool): Enable high performance premium proxies for the request to prevent being blocked at the network level.
- `query_selector` (string): The CSS query selector to use when extracting content from the markup.
- `full_resources` (bool): Crawl and download all the resources for a website.
- `css_extraction_map` (object): Use CSS or XPath selectors to scrape contents from the web page. Set the paths and the extraction object map to perform extractions per path or page.
- `request_timeout` (int): The timeout to use for request. Timeouts can be from `5-60`. The default is `30` seconds.
- `run_in_background` (bool): Run the request in the background. Useful if storing data and wanting to trigger crawls to the dashboard. This has no effect if storageless is set.
- `return_headers` (bool): Return the HTTP response headers with the results. Defaults to `false`.
- `filter_output_main_only` (bool): Filter the nav, aside, and footer from the output.
- `headers` (object): Forward HTTP headers to use for all request. The object is expected to be a map of key value pairs.
Learn other parameters that can be used: [https://spider.cloud/docs/api](https://spider.cloud/docs/api)

View File

@@ -1,61 +1,202 @@
import logging
from typing import Any, Dict, Literal, Optional, Type
from urllib.parse import unquote, urlparse
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
logger = logging.getLogger(__file__)
class SpiderToolSchema(BaseModel):
url: str = Field(description="Website URL")
params: Optional[Dict[str, Any]] = Field(
description="Set additional params. Options include:\n"
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
"""Input schema for SpiderTool."""
website_url: str = Field(
..., description="Mandatory website URL to scrape or crawl"
)
mode: Literal["scrape", "crawl"] = Field(
default="scrape",
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.",
description="The mode of the SpiderTool. The only two allowed modes are `scrape` or `crawl`. Crawl mode will follow up to 5 links and return their content in markdown format.",
)
class SpiderTool(BaseTool):
name: str = "Spider scrape & crawl tool"
description: str = "Scrape & Crawl any url and return LLM-ready data."
args_schema: Type[BaseModel] = SpiderToolSchema
api_key: Optional[str] = None
spider: Optional[Any] = None
class SpiderToolConfig(BaseModel):
"""Configuration settings for SpiderTool.
Contains all default values and constants used by SpiderTool.
Centralizes configuration management for easier maintenance.
"""
# Crawling settings
DEFAULT_CRAWL_LIMIT: int = 5
DEFAULT_RETURN_FORMAT: str = "markdown"
# Request parameters
DEFAULT_REQUEST_MODE: str = "smart"
FILTER_SVG: bool = True
class SpiderTool(BaseTool):
"""Tool for scraping and crawling websites.
This tool provides functionality to either scrape a single webpage or crawl multiple
pages, returning content in a format suitable for LLM processing.
"""
name: str = "SpiderTool"
description: str = (
"A tool to scrape or crawl a website and return LLM-ready content."
)
args_schema: Type[BaseModel] = SpiderToolSchema
custom_params: Optional[Dict[str, Any]] = None
website_url: Optional[str] = None
api_key: Optional[str] = None
spider: Any = None
log_failures: bool = True
config: SpiderToolConfig = SpiderToolConfig()
def __init__(
self,
api_key: Optional[str] = None,
website_url: Optional[str] = None,
custom_params: Optional[Dict[str, Any]] = None,
log_failures: bool = True,
**kwargs,
):
"""Initialize SpiderTool for web scraping and crawling.
Args:
api_key (Optional[str]): Spider API key for authentication. Required for production use.
website_url (Optional[str]): Default website URL to scrape/crawl. Can be overridden during execution.
custom_params (Optional[Dict[str, Any]]): Additional parameters to pass to Spider API.
These override any parameters set by the LLM.
log_failures (bool): If True, logs errors. Defaults to True.
**kwargs: Additional arguments passed to BaseTool.
Raises:
ImportError: If spider-client package is not installed.
RuntimeError: If Spider client initialization fails.
"""
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.log_failures = log_failures
self.custom_params = custom_params
try:
from spider import Spider # type: ignore
self.spider = Spider(api_key=api_key)
except ImportError:
raise ImportError(
"`spider-client` package not found, please run `pip install spider-client`"
"`spider-client` package not found, please run `uv add spider-client`"
)
except Exception as e:
raise RuntimeError(f"Failed to initialize Spider client: {str(e)}")
self.spider = Spider(api_key=api_key)
def _validate_url(self, url: str) -> bool:
"""Validate URL format and security constraints.
Args:
url (str): URL to validate. Must be a properly formatted HTTP(S) URL
Returns:
bool: True if URL is valid and meets security requirements, False otherwise.
"""
try:
url = url.strip()
decoded_url = unquote(url)
result = urlparse(decoded_url)
if not all([result.scheme, result.netloc]):
return False
if result.scheme not in ["http", "https"]:
return False
return True
except Exception:
return False
def _run(
self,
url: str,
params: Optional[Dict[str, Any]] = None,
mode: Optional[Literal["scrape", "crawl"]] = "scrape",
):
if mode not in ["scrape", "crawl"]:
raise ValueError(
"Unknown mode in `mode` parameter, `scrape` or `crawl` are the allowed modes"
website_url: str,
mode: Literal["scrape", "crawl"] = "scrape",
) -> Optional[str]:
"""Execute the spider tool to scrape or crawl the specified website.
Args:
website_url (str): The URL to process. Must be a valid HTTP(S) URL.
mode (Literal["scrape", "crawl"]): Operation mode.
- "scrape": Extract content from single page
- "crawl": Follow links and extract content from multiple pages
Returns:
Optional[str]: Extracted content in markdown format, or None if extraction fails
and log_failures is True.
Raises:
ValueError: If URL is invalid or missing, or if mode is invalid.
ImportError: If spider-client package is not properly installed.
ConnectionError: If network connection fails while accessing the URL.
Exception: For other runtime errors.
"""
try:
params = {}
url = website_url or self.website_url
if not url:
raise ValueError(
"Website URL must be provided either during initialization or execution"
)
if not self._validate_url(url):
raise ValueError(f"Invalid URL format: {url}")
if mode not in ["scrape", "crawl"]:
raise ValueError(
f"Invalid mode: {mode}. Must be either 'scrape' or 'crawl'"
)
params = {
"request": self.config.DEFAULT_REQUEST_MODE,
"filter_output_svg": self.config.FILTER_SVG,
"return_format": self.config.DEFAULT_RETURN_FORMAT,
}
if mode == "crawl":
params["limit"] = self.config.DEFAULT_CRAWL_LIMIT
if self.custom_params:
params.update(self.custom_params)
action = (
self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
)
return action(url=url, params=params)
# Ensure 'return_format': 'markdown' is always included
if params:
params["return_format"] = "markdown"
else:
params = {"return_format": "markdown"}
except ValueError as ve:
if self.log_failures:
logger.error(f"Validation error for URL {url}: {str(ve)}")
return None
raise ve
action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
spider_docs = action(url=url, params=params)
except ImportError as ie:
logger.error(f"Spider client import error: {str(ie)}")
raise ie
return spider_docs
except ConnectionError as ce:
if self.log_failures:
logger.error(f"Connection error while accessing {url}: {str(ce)}")
return None
raise ce
except Exception as e:
if self.log_failures:
logger.error(
f"Unexpected error during {mode} operation on {url}: {str(e)}"
)
return None
raise e

View File

@@ -1,12 +1,10 @@
import base64
from typing import Type, Optional
from pathlib import Path
from crewai.tools import BaseTool
from openai import OpenAI
from pydantic import BaseModel, validator
class ImagePromptSchema(BaseModel):
"""Input for Vision Tool."""
image_path_url: str = "The image path or URL."

View File

@@ -0,0 +1,80 @@
# WeaviateVectorSearchTool
## Description
This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query.
Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect
## Installation
Install the crewai_tools package by executing the following command in your terminal:
```shell
uv pip install 'crewai[tools]'
```
## Example
To utilize the WeaviateVectorSearchTool for different use cases, follow these examples:
```python
from crewai_tools import WeaviateVectorSearchTool
# To enable the tool to search any website the agent comes across or learns about during its operation
tool = WeaviateVectorSearchTool(
collection_name='example_collections',
limit=3,
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
weaviate_api_key="your-weaviate-api-key",
)
# or
# Setup custom model for vectorizer and generative model
tool = WeaviateVectorSearchTool(
collection_name='example_collections',
limit=3,
vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"),
generative_model=Configure.Generative.openai(model="gpt-4o-mini"),
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
weaviate_api_key="your-weaviate-api-key",
)
# Adding the tool to an agent
rag_agent = Agent(
name="rag_agent",
role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.",
llm="gpt-4o-mini",
tools=[tool],
)
```
## Arguments
- `collection_name` : The name of the collection to search within. (Required)
- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required)
- `weaviate_api_key` : The API key for the Weaviate cluster. (Required)
- `limit` : The number of results to return. (Optional)
- `vectorizer` : The vectorizer to use. (Optional)
- `generative_model` : The generative model to use. (Optional)
Preloading the Weaviate database with documents:
```python
from crewai_tools import WeaviateVectorSearchTool
# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect
test_docs = client.collections.get("example_collections")
docs_to_load = os.listdir("knowledge")
with test_docs.batch.dynamic() as batch:
for d in docs_to_load:
with open(os.path.join("knowledge", d), "r") as f:
content = f.read()
batch.add_object(
{
"content": content,
"year": d.split("_")[0],
}
)
tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3)
```

View File

@@ -0,0 +1,105 @@
import json
import os
from typing import Any, Optional, Type
try:
import weaviate
from weaviate.classes.config import Configure, Vectorizers
from weaviate.classes.init import Auth
WEAVIATE_AVAILABLE = True
except ImportError:
WEAVIATE_AVAILABLE = False
weaviate = Any # type placeholder
Configure = Any
Vectorizers = Any
Auth = Any
from pydantic import BaseModel, Field
from crewai.tools import BaseTool
class WeaviateToolSchema(BaseModel):
"""Input for WeaviateTool."""
query: str = Field(
...,
description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.",
)
class WeaviateVectorSearchTool(BaseTool):
"""Tool to search the Weaviate database"""
name: str = "WeaviateVectorSearchTool"
description: str = "A tool to search the Weaviate database for relevant information on internal documents."
args_schema: Type[BaseModel] = WeaviateToolSchema
query: Optional[str] = None
vectorizer: Optional[Vectorizers] = None
generative_model: Optional[str] = None
collection_name: Optional[str] = None
limit: Optional[int] = Field(default=3)
headers: Optional[dict] = None
weaviate_cluster_url: str = Field(
...,
description="The URL of the Weaviate cluster",
)
weaviate_api_key: str = Field(
...,
description="The API key for the Weaviate cluster",
)
def __init__(self, **kwargs):
super().__init__(**kwargs)
if WEAVIATE_AVAILABLE:
openai_api_key = os.environ.get("OPENAI_API_KEY")
if not openai_api_key:
raise ValueError(
"OPENAI_API_KEY environment variable is required for WeaviateVectorSearchTool and it is mandatory to use the tool."
)
self.headers = {"X-OpenAI-Api-Key": openai_api_key}
self.vectorizer = self.vectorizer or Configure.Vectorizer.text2vec_openai(
model="nomic-embed-text",
)
self.generative_model = (
self.generative_model
or Configure.Generative.openai(
model="gpt-4o",
)
)
def _run(self, query: str) -> str:
if not WEAVIATE_AVAILABLE:
raise ImportError(
"The 'weaviate-client' package is required to use the WeaviateVectorSearchTool. "
"Please install it with: uv add weaviate-client"
)
if not self.weaviate_cluster_url or not self.weaviate_api_key:
raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set")
client = weaviate.connect_to_weaviate_cloud(
cluster_url=self.weaviate_cluster_url,
auth_credentials=Auth.api_key(self.weaviate_api_key),
headers=self.headers,
)
internal_docs = client.collections.get(self.collection_name)
if not internal_docs:
internal_docs = client.collections.create(
name=self.collection_name,
vectorizer_config=self.vectorizer,
generative_config=self.generative_model,
)
response = internal_docs.query.near_text(
query=query,
limit=self.limit,
)
json_response = ""
for obj in response.objects:
json_response += json.dumps(obj.properties, indent=2)
client.close()
return json_response

View File

@@ -1,5 +1,6 @@
from typing import Callable
from crewai_tools import BaseTool, tool
from crewai.tools import BaseTool, tool
from crewai.tools.base_tool import to_langchain
def test_creating_a_tool_using_annotation():
@tool("Name of my tool")
@@ -9,14 +10,14 @@ def test_creating_a_tool_using_annotation():
# Assert all the right attributes were defined
assert my_tool.name == "Name of my tool"
assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
assert my_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it."
assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
assert my_tool.func("What is the meaning of life?") == "What is the meaning of life?"
# Assert the langchain tool conversion worked as expected
converted_tool = my_tool.to_langchain()
converted_tool = to_langchain([my_tool])[0]
assert converted_tool.name == "Name of my tool"
assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
assert converted_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it."
assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
assert converted_tool.func("What is the meaning of life?") == "What is the meaning of life?"
@@ -31,16 +32,16 @@ def test_creating_a_tool_using_baseclass():
my_tool = MyCustomTool()
# Assert all the right attributes were defined
assert my_tool.name == "Name of my tool"
assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
assert my_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it."
assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
assert my_tool.run("What is the meaning of life?") == "What is the meaning of life?"
assert my_tool._run("What is the meaning of life?") == "What is the meaning of life?"
# Assert the langchain tool conversion worked as expected
converted_tool = my_tool.to_langchain()
converted_tool = to_langchain([my_tool])[0]
assert converted_tool.name == "Name of my tool"
assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
assert converted_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it."
assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
assert converted_tool.run("What is the meaning of life?") == "What is the meaning of life?"
assert converted_tool.invoke({"question": "What is the meaning of life?"}) == "What is the meaning of life?"
def test_setting_cache_function():
class MyCustomTool(BaseTool):

View File

@@ -0,0 +1,84 @@
import os
import pytest
from crewai_tools import FileReadTool
def test_file_read_tool_constructor():
"""Test FileReadTool initialization with file_path."""
# Create a temporary test file
test_file = "/tmp/test_file.txt"
test_content = "Hello, World!"
with open(test_file, "w") as f:
f.write(test_content)
# Test initialization with file_path
tool = FileReadTool(file_path=test_file)
assert tool.file_path == test_file
assert "test_file.txt" in tool.description
# Clean up
os.remove(test_file)
def test_file_read_tool_run():
"""Test FileReadTool _run method with file_path at runtime."""
# Create a temporary test file
test_file = "/tmp/test_file.txt"
test_content = "Hello, World!"
with open(test_file, "w") as f:
f.write(test_content)
# Test reading file with runtime file_path
tool = FileReadTool()
result = tool._run(file_path=test_file)
assert result == test_content
# Clean up
os.remove(test_file)
def test_file_read_tool_error_handling():
"""Test FileReadTool error handling."""
# Test missing file path
tool = FileReadTool()
result = tool._run()
assert "Error: No file path provided" in result
# Test non-existent file
result = tool._run(file_path="/nonexistent/file.txt")
assert "Error: File not found at path:" in result
# Test permission error (create a file without read permissions)
test_file = "/tmp/no_permission.txt"
with open(test_file, "w") as f:
f.write("test")
os.chmod(test_file, 0o000)
result = tool._run(file_path=test_file)
assert "Error: Permission denied" in result
# Clean up
os.chmod(test_file, 0o666) # Restore permissions to delete
os.remove(test_file)
def test_file_read_tool_constructor_and_run():
"""Test FileReadTool using both constructor and runtime file paths."""
# Create two test files
test_file1 = "/tmp/test1.txt"
test_file2 = "/tmp/test2.txt"
content1 = "File 1 content"
content2 = "File 2 content"
with open(test_file1, "w") as f1, open(test_file2, "w") as f2:
f1.write(content1)
f2.write(content2)
# Test that constructor file_path works
tool = FileReadTool(file_path=test_file1)
result = tool._run()
assert result == content1
# Test that runtime file_path overrides constructor
result = tool._run(file_path=test_file2)
assert result == content2
# Clean up
os.remove(test_file1)
os.remove(test_file2)

View File

@@ -3,7 +3,7 @@ from crewai import Agent, Task, Crew
def test_spider_tool():
spider_tool = SpiderTool()
searcher = Agent(
role="Web Research Expert",
goal="Find related information from specific URL's",
@@ -12,7 +12,7 @@ def test_spider_tool():
verbose=True,
cache=False
)
choose_between_scrape_crawl = Task(
description="Scrape the page of spider.cloud and return a summary of how fast it is",
expected_output="spider.cloud is a fast scraping and crawling tool",
@@ -34,13 +34,13 @@ def test_spider_tool():
crew = Crew(
agents=[searcher],
tasks=[
choose_between_scrape_crawl,
return_metadata,
choose_between_scrape_crawl,
return_metadata,
css_selector
],
verbose=2
verbose=True
)
crew.kickoff()
if __name__ == "__main__":

View File

@@ -0,0 +1,50 @@
from unittest.mock import patch
import pytest
from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
@pytest.fixture
def brave_tool():
return BraveSearchTool(n_results=2)
def test_brave_tool_initialization():
tool = BraveSearchTool()
assert tool.n_results == 10
assert tool.save_file is False
@patch("requests.get")
def test_brave_tool_search(mock_get, brave_tool):
mock_response = {
"web": {
"results": [
{
"title": "Test Title",
"url": "http://test.com",
"description": "Test Description",
}
]
}
}
mock_get.return_value.json.return_value = mock_response
result = brave_tool.run(search_query="test")
assert "Test Title" in result
assert "http://test.com" in result
def test_brave_tool():
tool = BraveSearchTool(
n_results=2,
)
x = tool.run(search_query="ChatGPT")
print(x)
if __name__ == "__main__":
test_brave_tool()
test_brave_tool_initialization()
# test_brave_tool_search(brave_tool)

View File

@@ -0,0 +1,93 @@
from unittest.mock import MagicMock, patch
from bs4 import BeautifulSoup
from crewai_tools.tools.selenium_scraping_tool.selenium_scraping_tool import (
SeleniumScrapingTool,
)
def mock_driver_with_html(html_content):
driver = MagicMock()
mock_element = MagicMock()
mock_element.get_attribute.return_value = html_content
bs = BeautifulSoup(html_content, "html.parser")
mock_element.text = bs.get_text()
driver.find_elements.return_value = [mock_element]
driver.find_element.return_value = mock_element
return driver
def initialize_tool_with(mock_driver):
tool = SeleniumScrapingTool()
tool.driver = MagicMock(return_value=mock_driver)
return tool
def test_tool_initialization():
tool = SeleniumScrapingTool()
assert tool.website_url is None
assert tool.css_element is None
assert tool.cookie is None
assert tool.wait_time == 3
assert tool.return_html is False
@patch("selenium.webdriver.Chrome")
def test_scrape_without_css_selector(_mocked_chrome_driver):
html_content = "<html><body><div>test content</div></body></html>"
mock_driver = mock_driver_with_html(html_content)
tool = initialize_tool_with(mock_driver)
result = tool._run(website_url="https://example.com")
assert "test content" in result
mock_driver.get.assert_called_once_with("https://example.com")
mock_driver.find_element.assert_called_with("tag name", "body")
mock_driver.close.assert_called_once()
@patch("selenium.webdriver.Chrome")
def test_scrape_with_css_selector(_mocked_chrome_driver):
html_content = "<html><body><div>test content</div><div class='test'>test content in a specific div</div></body></html>"
mock_driver = mock_driver_with_html(html_content)
tool = initialize_tool_with(mock_driver)
result = tool._run(website_url="https://example.com", css_element="div.test")
assert "test content in a specific div" in result
mock_driver.get.assert_called_once_with("https://example.com")
mock_driver.find_elements.assert_called_with("css selector", "div.test")
mock_driver.close.assert_called_once()
@patch("selenium.webdriver.Chrome")
def test_scrape_with_return_html_true(_mocked_chrome_driver):
html_content = "<html><body><div>HTML content</div></body></html>"
mock_driver = mock_driver_with_html(html_content)
tool = initialize_tool_with(mock_driver)
result = tool._run(website_url="https://example.com", return_html=True)
assert html_content in result
mock_driver.get.assert_called_once_with("https://example.com")
mock_driver.find_element.assert_called_with("tag name", "body")
mock_driver.close.assert_called_once()
@patch("selenium.webdriver.Chrome")
def test_scrape_with_return_html_false(_mocked_chrome_driver):
html_content = "<html><body><div>HTML content</div></body></html>"
mock_driver = mock_driver_with_html(html_content)
tool = initialize_tool_with(mock_driver)
result = tool._run(website_url="https://example.com", return_html=False)
assert "HTML content" in result
mock_driver.get.assert_called_once_with("https://example.com")
mock_driver.find_element.assert_called_with("tag name", "body")
mock_driver.close.assert_called_once()

View File

@@ -7,32 +7,47 @@ from crewai_tools.tools.code_interpreter_tool.code_interpreter_tool import (
class TestCodeInterpreterTool(unittest.TestCase):
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker_from_env")
def test_run_code_in_docker(self, docker_mock):
tool = CodeInterpreterTool()
code = "print('Hello, World!')"
libraries_used = "numpy,pandas"
libraries_used = ["numpy", "pandas"]
expected_output = "Hello, World!\n"
docker_mock.from_env().containers.run().exec_run().exit_code = 0
docker_mock.from_env().containers.run().exec_run().output = (
docker_mock().containers.run().exec_run().exit_code = 0
docker_mock().containers.run().exec_run().output = (
expected_output.encode()
)
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker")
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker_from_env")
def test_run_code_in_docker_with_error(self, docker_mock):
tool = CodeInterpreterTool()
code = "print(1/0)"
libraries_used = "numpy,pandas"
libraries_used = ["numpy", "pandas"]
expected_output = "Something went wrong while running the code: \nZeroDivisionError: division by zero\n"
docker_mock.from_env().containers.run().exec_run().exit_code = 1
docker_mock.from_env().containers.run().exec_run().output = (
docker_mock().containers.run().exec_run().exit_code = 1
docker_mock().containers.run().exec_run().output = (
b"ZeroDivisionError: division by zero\n"
)
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)
@patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker_from_env")
def test_run_code_in_docker_with_script(self, docker_mock):
tool = CodeInterpreterTool()
code = """print("This is line 1")
print("This is line 2")"""
libraries_used = [] # No additional libraries needed for this test
expected_output = "This is line 1\nThis is line 2\n"
# Mock Docker responses
docker_mock().containers.run().exec_run().exit_code = 0
docker_mock().containers.run().exec_run().output = expected_output.encode()
result = tool.run_code_in_docker(code, libraries_used)
self.assertEqual(result, expected_output)