diff --git a/README.md b/README.md index 491890877..43cdc9b57 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ In the realm of CrewAI agents, tools are pivotal for enhancing functionality. Th

-[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) +[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) | [Discourse](https://community.crewai.com/)

@@ -51,7 +51,7 @@ There are three ways to create tools for crewAI agents: ### Subclassing `BaseTool` ```python -from crewai_tools import BaseTool +from crewai.tools import BaseTool class MyCustomTool(BaseTool): name: str = "Name of my tool" @@ -70,7 +70,7 @@ Define a new class inheriting from `BaseTool`, specifying `name`, `description`, For a simpler approach, create a `Tool` object directly with the required attributes and a functional logic. ```python -from crewai_tools import tool +from crewai.tools import BaseTool @tool("Name of my tool") def my_tool(question: str) -> str: """Clear description for what this tool is useful for, you agent will need this information to use it.""" @@ -140,6 +140,4 @@ Thank you for your interest in enhancing the capabilities of AI agents through a ## Contact -For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb) or open an issue in this repository. - - +For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb), [Discourse](https://community.crewai.com/) or open an issue in this repository. diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 6bd8dfd71..12523a214 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -1,4 +1,5 @@ from .tools import ( + BraveSearchTool, BrowserbaseLoadTool, CodeDocsSearchTool, CodeInterpreterTool, @@ -19,6 +20,7 @@ from .tools import ( LlamaIndexTool, MDXSearchTool, MultiOnTool, + MySQLSearchTool, NL2SQLTool, PDFSearchTool, PGSearchTool, @@ -40,6 +42,5 @@ from .tools import ( XMLSearchTool, YoutubeChannelSearchTool, YoutubeVideoSearchTool, - MySQLSearchTool + WeaviateVectorSearchTool, ) -from .tools.base_tool import BaseTool, Tool, tool diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 9016c57fd..23565dbea 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -1,3 +1,4 @@ +from .brave_search_tool.brave_search_tool import BraveSearchTool from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool @@ -11,10 +12,10 @@ from .exa_tools.exa_search_tool import EXASearchTool from .file_read_tool.file_read_tool import FileReadTool from .file_writer_tool.file_writer_tool import FileWriterTool from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( - FirecrawlCrawlWebsiteTool + FirecrawlCrawlWebsiteTool, ) from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( - FirecrawlScrapeWebsiteTool + FirecrawlScrapeWebsiteTool, ) from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool from .github_search_tool.github_search_tool import GithubSearchTool @@ -22,16 +23,17 @@ from .json_search_tool.json_search_tool import JSONSearchTool from .llamaindex_tool.llamaindex_tool import LlamaIndexTool from .mdx_seach_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool +from .mysql_search_tool.mysql_search_tool import MySQLSearchTool from .nl2sql.nl2sql_tool import NL2SQLTool from .pdf_search_tool.pdf_search_tool import PDFSearchTool from .pg_seach_tool.pg_search_tool import PGSearchTool from .rag.rag_tool import RagTool from .scrape_element_from_website.scrape_element_from_website import ( - ScrapeElementFromWebsiteTool + ScrapeElementFromWebsiteTool, ) from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ( - ScrapflyScrapeWebsiteTool + ScrapflyScrapeWebsiteTool, ) from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool from .serper_dev_tool.serper_dev_tool import SerperDevTool @@ -46,7 +48,7 @@ from .vision_tool.vision_tool import VisionTool from .website_search.website_search_tool import WebsiteSearchTool from .xml_search_tool.xml_search_tool import XMLSearchTool from .youtube_channel_search_tool.youtube_channel_search_tool import ( - YoutubeChannelSearchTool + YoutubeChannelSearchTool, ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool -from .mysql_search_tool.mysql_search_tool import MySQLSearchTool +from .weaviate_tool.vector_search import WeaviateVectorSearchTool diff --git a/src/crewai_tools/tools/base_tool.py b/src/crewai_tools/tools/base_tool.py deleted file mode 100644 index 674e33030..000000000 --- a/src/crewai_tools/tools/base_tool.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Any, Callable - -from pydantic import BaseModel as PydanticBaseModel - -from crewai.tools.base_tool import BaseTool -from crewai.tools.structured_tool import CrewStructuredTool - - -class Tool(BaseTool): - func: Callable - """The function that will be executed when the tool is called.""" - - def _run(self, *args: Any, **kwargs: Any) -> Any: - return self.func(*args, **kwargs) - - -def to_langchain( - tools: list[BaseTool | CrewStructuredTool], -) -> list[CrewStructuredTool]: - return [t.to_structured_tool() if isinstance(t, BaseTool) else t for t in tools] - - -def tool(*args): - """ - Decorator to create a tool from a function. - """ - - def _make_with_name(tool_name: str) -> Callable: - def _make_tool(f: Callable) -> BaseTool: - if f.__doc__ is None: - raise ValueError("Function must have a docstring") - if f.__annotations__ is None: - raise ValueError("Function must have type annotations") - - class_name = "".join(tool_name.split()).title() - args_schema = type( - class_name, - (PydanticBaseModel,), - { - "__annotations__": { - k: v for k, v in f.__annotations__.items() if k != "return" - }, - }, - ) - - return Tool( - name=tool_name, - description=f.__doc__, - func=f, - args_schema=args_schema, - ) - - return _make_tool - - if len(args) == 1 and callable(args[0]): - return _make_with_name(args[0].__name__)(args[0]) - if len(args) == 1 and isinstance(args[0], str): - return _make_with_name(args[0]) - raise ValueError("Invalid arguments") diff --git a/src/crewai_tools/tools/brave_search_tool/README.md b/src/crewai_tools/tools/brave_search_tool/README.md new file mode 100644 index 000000000..a66210491 --- /dev/null +++ b/src/crewai_tools/tools/brave_search_tool/README.md @@ -0,0 +1,30 @@ +# BraveSearchTool Documentation + +## Description +This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back. + +## Installation +To incorporate this tool into your project, follow the installation instructions below: +```shell +pip install 'crewai[tools]' +``` + +## Example +The following example demonstrates how to initialize the tool and execute a search with a given query: + +```python +from crewai_tools import BraveSearchTool + +# Initialize the tool for internet searching capabilities +tool = BraveSearchTool() +``` + +## Steps to Get Started +To effectively use the `BraveSearchTool`, follow these steps: + +1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. +2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys). +3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool. + +## Conclusion +By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. diff --git a/src/crewai_tools/tools/brave_search_tool/__init__.py b/src/crewai_tools/tools/brave_search_tool/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py new file mode 100644 index 000000000..11035739d --- /dev/null +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -0,0 +1,118 @@ +import datetime +import os +import time +from typing import Any, ClassVar, Optional, Type + +import requests +from crewai.tools import BaseTool +from pydantic import BaseModel, Field + + +def _save_results_to_file(content: str) -> None: + """Saves the search results to a file.""" + filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" + with open(filename, "w") as file: + file.write(content) + print(f"Results saved to {filename}") + + +class BraveSearchToolSchema(BaseModel): + """Input for BraveSearchTool.""" + + search_query: str = Field( + ..., description="Mandatory search query you want to use to search the internet" + ) + + +class BraveSearchTool(BaseTool): + """ + BraveSearchTool - A tool for performing web searches using the Brave Search API. + + This module provides functionality to search the internet using Brave's Search API, + supporting customizable result counts and country-specific searches. + + Dependencies: + - requests + - pydantic + - python-dotenv (for API key management) + """ + + name: str = "Brave Web Search the internet" + description: str = ( + "A tool that can be used to search the internet with a search_query." + ) + args_schema: Type[BaseModel] = BraveSearchToolSchema + search_url: str = "https://api.search.brave.com/res/v1/web/search" + country: Optional[str] = "" + n_results: int = 10 + save_file: bool = False + _last_request_time: ClassVar[float] = 0 + _min_request_interval: ClassVar[float] = 1.0 # seconds + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if "BRAVE_API_KEY" not in os.environ: + raise ValueError( + "BRAVE_API_KEY environment variable is required for BraveSearchTool" + ) + + def _run( + self, + **kwargs: Any, + ) -> Any: + current_time = time.time() + if (current_time - self._last_request_time) < self._min_request_interval: + time.sleep( + self._min_request_interval - (current_time - self._last_request_time) + ) + BraveSearchTool._last_request_time = time.time() + try: + search_query = kwargs.get("search_query") or kwargs.get("query") + if not search_query: + raise ValueError("Search query is required") + + save_file = kwargs.get("save_file", self.save_file) + n_results = kwargs.get("n_results", self.n_results) + + payload = {"q": search_query, "count": n_results} + + if self.country != "": + payload["country"] = self.country + + headers = { + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + "Accept": "application/json", + } + + response = requests.get(self.search_url, headers=headers, params=payload) + response.raise_for_status() # Handle non-200 responses + results = response.json() + + if "web" in results: + results = results["web"]["results"] + string = [] + for result in results: + try: + string.append( + "\n".join( + [ + f"Title: {result['title']}", + f"Link: {result['url']}", + f"Snippet: {result['description']}", + "---", + ] + ) + ) + except KeyError: + continue + + content = "\n".join(string) + except requests.RequestException as e: + return f"Error performing search: {str(e)}" + except KeyError as e: + return f"Error parsing search results: {str(e)}" + if save_file: + _save_results_to_file(content) + return f"\nSearch results: {content}\n" + else: + return content diff --git a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index 514664557..54c33db3c 100644 --- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class BrowserbaseLoadToolSchema(BaseModel): url: str = Field(description="Website URL") diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index a4488b35f..61c180fe3 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -3,10 +3,9 @@ import os from typing import List, Optional, Type import docker +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class CodeInterpreterSchema(BaseModel): """Input for CodeInterpreterTool.""" diff --git a/src/crewai_tools/tools/composio_tool/composio_tool.py b/src/crewai_tools/tools/composio_tool/composio_tool.py index 62068c0bd..4823441bf 100644 --- a/src/crewai_tools/tools/composio_tool/composio_tool.py +++ b/src/crewai_tools/tools/composio_tool/composio_tool.py @@ -5,8 +5,7 @@ Composio tools wrapper. import typing as t import typing_extensions as te - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class ComposioTool(BaseTool): diff --git a/src/crewai_tools/tools/dalle_tool/dalle_tool.py b/src/crewai_tools/tools/dalle_tool/dalle_tool.py index da6adb2b1..7040de11a 100644 --- a/src/crewai_tools/tools/dalle_tool/dalle_tool.py +++ b/src/crewai_tools/tools/dalle_tool/dalle_tool.py @@ -1,11 +1,10 @@ import json from typing import Type +from crewai.tools import BaseTool from openai import OpenAI from pydantic import BaseModel -from crewai_tools.tools.base_tool import BaseTool - class ImagePromptSchema(BaseModel): """Input for Dall-E Tool.""" diff --git a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 3d308ba45..6033202be 100644 --- a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -1,10 +1,9 @@ import os from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedDirectoryReadToolSchema(BaseModel): """Input for DirectoryReadTool.""" diff --git a/src/crewai_tools/tools/exa_tools/exa_base_tool.py b/src/crewai_tools/tools/exa_tools/exa_base_tool.py index d2fe6217c..295b283ad 100644 --- a/src/crewai_tools/tools/exa_tools/exa_base_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_base_tool.py @@ -1,10 +1,8 @@ -import os from typing import Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class EXABaseToolToolSchema(BaseModel): """Input for EXABaseTool.""" diff --git a/src/crewai_tools/tools/exa_tools/exa_search_tool.py b/src/crewai_tools/tools/exa_tools/exa_search_tool.py index 30f77d1ee..6724c2417 100644 --- a/src/crewai_tools/tools/exa_tools/exa_search_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_search_tool.py @@ -1,28 +1,30 @@ import os -import requests from typing import Any +import requests + from .exa_base_tool import EXABaseTool + class EXASearchTool(EXABaseTool): - def _run( - self, - **kwargs: Any, - ) -> Any: - search_query = kwargs.get('search_query') - if search_query is None: - search_query = kwargs.get('query') + def _run( + self, + **kwargs: Any, + ) -> Any: + search_query = kwargs.get("search_query") + if search_query is None: + search_query = kwargs.get("query") - payload = { - "query": search_query, - "type": "magic", - } + payload = { + "query": search_query, + "type": "magic", + } - headers = self.headers.copy() - headers["x-api-key"] = os.environ['EXA_API_KEY'] + headers = self.headers.copy() + headers["x-api-key"] = os.environ["EXA_API_KEY"] - response = requests.post(self.search_url, json=payload, headers=headers) - results = response.json() - if 'results' in results: - results = super()._parse_results(results['results']) - return results + response = requests.post(self.search_url, json=payload, headers=headers) + results = response.json() + if "results" in results: + results = super()._parse_results(results["results"]) + return results diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 265dca54a..fe34c9d8b 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedFileReadToolSchema(BaseModel): """Input for FileReadTool.""" diff --git a/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py b/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py index a008e4a75..ed454a1bd 100644 --- a/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py +++ b/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py @@ -1,16 +1,18 @@ import os -from typing import Any, Optional, Type -from pydantic import BaseModel -from ..base_tool import BaseTool from distutils.util import strtobool +from typing import Any, Optional, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel class FileWriterToolInput(BaseModel): - filename: str + filename: str directory: Optional[str] = "./" overwrite: str = "False" content: str - + + class FileWriterTool(BaseTool): name: str = "File Writer Tool" description: str = ( @@ -26,7 +28,7 @@ class FileWriterTool(BaseTool): # Construct the full path filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"]) - + # Convert overwrite to boolean kwargs["overwrite"] = bool(strtobool(kwargs["overwrite"])) @@ -46,4 +48,4 @@ class FileWriterTool(BaseTool): except KeyError as e: return f"An error occurred while accessing key: {str(e)}" except Exception as e: - return f"An error occurred while writing to the file: {str(e)}" \ No newline at end of file + return f"An error occurred while writing to the file: {str(e)}" diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index a535b6c63..c23ff2100 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field # Type checking import if TYPE_CHECKING: @@ -20,6 +19,9 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): class FirecrawlCrawlWebsiteTool(BaseTool): + model_config = ConfigDict( + arbitrary_types_allowed=True, validate_assignment=True, frozen=False + ) name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema @@ -50,3 +52,15 @@ class FirecrawlCrawlWebsiteTool(BaseTool): options = {"crawlerOptions": crawler_options, "pageOptions": page_options} return self.firecrawl.crawl_url(url, options) + + +try: + from firecrawl import FirecrawlApp + + # Must rebuild model after class is defined + FirecrawlCrawlWebsiteTool.model_rebuild() +except ImportError: + """ + When this tool is not used, then exception can be ignored. + """ + pass diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index ee8e592ca..9ab7d293e 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field # Type checking import if TYPE_CHECKING: @@ -24,6 +23,9 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): + model_config = ConfigDict( + arbitrary_types_allowed=True, validate_assignment=True, frozen=False + ) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema @@ -61,3 +63,15 @@ class FirecrawlScrapeWebsiteTool(BaseTool): "timeout": timeout, } return self.firecrawl.scrape_url(url, options) + + +try: + from firecrawl import FirecrawlApp + + # Must rebuild model after class is defined + FirecrawlScrapeWebsiteTool.model_rebuild() +except ImportError: + """ + When this tool is not used, then exception can be ignored. + """ + pass diff --git a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 13c3b82ee..5efd274de 100644 --- a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -1,9 +1,8 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - # Type checking import if TYPE_CHECKING: from firecrawl import FirecrawlApp diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/README.md b/src/crewai_tools/tools/jina_scrape_website_tool/README.md new file mode 100644 index 000000000..0278e5aa0 --- /dev/null +++ b/src/crewai_tools/tools/jina_scrape_website_tool/README.md @@ -0,0 +1,38 @@ +# JinaScrapeWebsiteTool + +## Description +A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites. + +## Installation +Install the crewai_tools package +```shell +pip install 'crewai[tools]' +``` + +## Example +```python +from crewai_tools import JinaScrapeWebsiteTool + +# To enable scraping any website it finds during its execution +tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY') + +# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website +tool = JinaScrapeWebsiteTool(website_url='https://www.example.com') + +# With custom headers +tool = JinaScrapeWebsiteTool( + website_url='https://www.example.com', + custom_headers={'X-Target-Selector': 'body, .class, #id'} +) +``` + +## Authentication +The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key. + +## Arguments +- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read. +- `api_key`: Optional Jina.ai API key for authenticated access to the reader service. +- `custom_headers`: Optional dictionary of HTTP headers to use when making requests. + +## Note +This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities. \ No newline at end of file diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py new file mode 100644 index 000000000..a10a4ffdb --- /dev/null +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -0,0 +1,54 @@ +from typing import Optional, Type + +import requests +from crewai.tools import BaseTool +from pydantic import BaseModel, Field + + +class JinaScrapeWebsiteToolInput(BaseModel): + """Input schema for JinaScrapeWebsiteTool.""" + + website_url: str = Field(..., description="Mandatory website url to read the file") + + +class JinaScrapeWebsiteTool(BaseTool): + name: str = "JinaScrapeWebsiteTool" + description: str = ( + "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + ) + args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput + website_url: Optional[str] = None + api_key: Optional[str] = None + headers: dict = {} + + def __init__( + self, + website_url: Optional[str] = None, + api_key: Optional[str] = None, + custom_headers: Optional[dict] = None, + **kwargs, + ): + super().__init__(**kwargs) + if website_url is not None: + self.website_url = website_url + self.description = f"A tool that can be used to read {website_url}'s content and return markdown content." + self._generate_description() + + if custom_headers is not None: + self.headers = custom_headers + + if api_key is not None: + self.headers["Authorization"] = f"Bearer {api_key}" + + def _run(self, website_url: Optional[str] = None) -> str: + url = website_url or self.website_url + if not url: + raise ValueError( + "Website URL must be provided either during initialization or execution" + ) + + response = requests.get( + f"https://r.jina.ai/{url}", headers=self.headers, timeout=15 + ) + response.raise_for_status() + return response.text diff --git a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py index af5c93e1f..61a747956 100644 --- a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py +++ b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type, cast +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class LlamaIndexTool(BaseTool): """Tool to wrap LlamaIndex tools/query engines.""" diff --git a/src/crewai_tools/tools/multion_tool/multion_tool.py b/src/crewai_tools/tools/multion_tool/multion_tool.py index 2dc944f23..a991074da 100644 --- a/src/crewai_tools/tools/multion_tool/multion_tool.py +++ b/src/crewai_tools/tools/multion_tool/multion_tool.py @@ -2,7 +2,7 @@ from typing import Any, Optional -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class MultiOnTool(BaseTool): diff --git a/src/crewai_tools/tools/nl2sql/nl2sql_tool.py b/src/crewai_tools/tools/nl2sql/nl2sql_tool.py index 22c3a299b..786550ee7 100644 --- a/src/crewai_tools/tools/nl2sql/nl2sql_tool.py +++ b/src/crewai_tools/tools/nl2sql/nl2sql_tool.py @@ -1,11 +1,10 @@ -from typing import Any, Union +from typing import Any, Type, Union -from ..base_tool import BaseTool +from crewai.tools import BaseTool from pydantic import BaseModel, Field from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker -from typing import Type, Any class NL2SQLToolInput(BaseModel): sql_query: str = Field( @@ -13,6 +12,7 @@ class NL2SQLToolInput(BaseModel): description="The SQL query to execute.", ) + class NL2SQLTool(BaseTool): name: str = "NL2SQLTool" description: str = "Converts natural language to SQL queries and executes them." diff --git a/src/crewai_tools/tools/rag/rag_tool.py b/src/crewai_tools/tools/rag/rag_tool.py index 97291cd81..a9bbdab53 100644 --- a/src/crewai_tools/tools/rag/rag_tool.py +++ b/src/crewai_tools/tools/rag/rag_tool.py @@ -1,10 +1,9 @@ from abc import ABC, abstractmethod from typing import Any +from crewai.tools import BaseTool from pydantic import BaseModel, Field, model_validator -from crewai_tools.tools.base_tool import BaseTool - class Adapter(BaseModel, ABC): class Config: diff --git a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index 56bb27195..14757d247 100644 --- a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type import requests from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedScrapeElementFromWebsiteToolSchema(BaseModel): """Input for ScrapeElementFromWebsiteTool.""" diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 7173c2156..8cfc5d136 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -1,12 +1,12 @@ import os +import re from typing import Any, Optional, Type import requests from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedScrapeWebsiteToolSchema(BaseModel): """Input for ScrapeWebsiteTool.""" @@ -67,7 +67,7 @@ class ScrapeWebsiteTool(BaseTool): page.encoding = page.apparent_encoding parsed = BeautifulSoup(page.text, "html.parser") - text = parsed.get_text() - text = "\n".join([i for i in text.split("\n") if i.strip() != ""]) - text = " ".join([i for i in text.split(" ") if i.strip() != ""]) + text = parsed.get_text(" ") + text = re.sub("[ \t]+", " ", text) + text = re.sub("\\s+\n\\s+", "\n", text) return text diff --git a/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index 5800e223c..b47ce8e5b 100644 --- a/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -1,10 +1,9 @@ import logging from typing import Any, Dict, Literal, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - logger = logging.getLogger(__file__) diff --git a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py index 970cde7ca..47910f35b 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py +++ b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py @@ -1,14 +1,12 @@ import time from typing import Any, Optional, Type -from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By -from ..base_tool import BaseTool - class FixedSeleniumScrapingToolSchema(BaseModel): """Input for SeleniumScrapingTool.""" diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index b23884180..5e8986c7e 100644 --- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -5,16 +5,15 @@ import logging from typing import Any, Type import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai.tools import BaseTool logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) - def _save_results_to_file(content: str) -> None: """Saves the search results to a file.""" try: diff --git a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py index 21e6e9872..c058091a2 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyNewsSearchToolSchema(BaseModel): """Input for Serply News Search.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py index 1ac6337f6..3ed9de4ab 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyScholarSearchToolSchema(BaseModel): """Input for Serply Scholar Search.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py index b65fa21d1..b4d1ae4b5 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyWebSearchToolSchema(BaseModel): """Input for Serply Web Search.""" diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index c01b5e2a3..94da9f6fe 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -1,9 +1,8 @@ from typing import Any, Dict, Literal, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SpiderToolSchema(BaseModel): url: str = Field(description="Website URL") diff --git a/src/crewai_tools/tools/vision_tool/vision_tool.py b/src/crewai_tools/tools/vision_tool/vision_tool.py index 6b7a21dbd..3ac3c3ae5 100644 --- a/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -2,11 +2,10 @@ import base64 from typing import Type import requests +from crewai.tools import BaseTool from openai import OpenAI from pydantic import BaseModel -from crewai_tools.tools.base_tool import BaseTool - class ImagePromptSchema(BaseModel): """Input for Vision Tool.""" diff --git a/src/crewai_tools/tools/weaviate_tool/README.md b/src/crewai_tools/tools/weaviate_tool/README.md new file mode 100644 index 000000000..c48f2f70a --- /dev/null +++ b/src/crewai_tools/tools/weaviate_tool/README.md @@ -0,0 +1,80 @@ +# WeaviateVectorSearchTool + +## Description +This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query. + +Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect + +## Installation +Install the crewai_tools package by executing the following command in your terminal: + +```shell +uv pip install 'crewai[tools]' +``` + +## Example +To utilize the WeaviateVectorSearchTool for different use cases, follow these examples: + +```python +from crewai_tools import WeaviateVectorSearchTool + +# To enable the tool to search any website the agent comes across or learns about during its operation +tool = WeaviateVectorSearchTool( + collection_name='example_collections', + limit=3, + weaviate_cluster_url="https://your-weaviate-cluster-url.com", + weaviate_api_key="your-weaviate-api-key", +) + +# or + +# Setup custom model for vectorizer and generative model +tool = WeaviateVectorSearchTool( + collection_name='example_collections', + limit=3, + vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"), + generative_model=Configure.Generative.openai(model="gpt-4o-mini"), + weaviate_cluster_url="https://your-weaviate-cluster-url.com", + weaviate_api_key="your-weaviate-api-key", +) + +# Adding the tool to an agent +rag_agent = Agent( + name="rag_agent", + role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.", + llm="gpt-4o-mini", + tools=[tool], +) +``` + +## Arguments +- `collection_name` : The name of the collection to search within. (Required) +- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required) +- `weaviate_api_key` : The API key for the Weaviate cluster. (Required) +- `limit` : The number of results to return. (Optional) +- `vectorizer` : The vectorizer to use. (Optional) +- `generative_model` : The generative model to use. (Optional) + +Preloading the Weaviate database with documents: + +```python +from crewai_tools import WeaviateVectorSearchTool + +# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect +test_docs = client.collections.get("example_collections") + + +docs_to_load = os.listdir("knowledge") +with test_docs.batch.dynamic() as batch: + for d in docs_to_load: + with open(os.path.join("knowledge", d), "r") as f: + content = f.read() + batch.add_object( + { + "content": content, + "year": d.split("_")[0], + } + ) +tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3) + +``` diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py new file mode 100644 index 000000000..ab80b6ce1 --- /dev/null +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -0,0 +1,89 @@ +import os +import json +import weaviate +from pydantic import BaseModel, Field +from typing import Type, Optional +from crewai.tools import BaseTool + +from weaviate.classes.config import Configure, Vectorizers +from weaviate.classes.init import Auth + + +class WeaviateToolSchema(BaseModel): + """Input for WeaviateTool.""" + + query: str = Field( + ..., + description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.", + ) + + +class WeaviateVectorSearchTool(BaseTool): + """Tool to search the Weaviate database""" + + name: str = "WeaviateVectorSearchTool" + description: str = "A tool to search the Weaviate database for relevant information on internal documents." + args_schema: Type[BaseModel] = WeaviateToolSchema + query: Optional[str] = None + + vectorizer: Optional[Vectorizers] = Field( + default=Configure.Vectorizer.text2vec_openai( + model="nomic-embed-text", + ) + ) + generative_model: Optional[str] = Field( + default=Configure.Generative.openai( + model="gpt-4o", + ), + ) + collection_name: Optional[str] = None + limit: Optional[int] = Field(default=3) + headers: Optional[dict] = Field( + default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]} + ) + weaviate_cluster_url: str = Field( + ..., + description="The URL of the Weaviate cluster", + ) + weaviate_api_key: str = Field( + ..., + description="The API key for the Weaviate cluster", + ) + + def _run(self, query: str) -> str: + """Search the Weaviate database + + Args: + query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question. + + Returns: + str: The result of the search query + """ + + if not self.weaviate_cluster_url or not self.weaviate_api_key: + raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set") + + client = weaviate.connect_to_weaviate_cloud( + cluster_url=self.weaviate_cluster_url, + auth_credentials=Auth.api_key(self.weaviate_api_key), + headers=self.headers, + ) + internal_docs = client.collections.get(self.collection_name) + + if not internal_docs: + internal_docs = client.collections.create( + name=self.collection_name, + vectorizer_config=self.vectorizer, + generative_config=self.generative_model, + ) + + response = internal_docs.query.near_text( + query=query, + limit=self.limit, + ) + json_response = "" + for obj in response.objects: + json_response += json.dumps(obj.properties, indent=2) + + client.close() + return json_response diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py new file mode 100644 index 000000000..36300f723 --- /dev/null +++ b/tests/tools/brave_search_tool_test.py @@ -0,0 +1,50 @@ +from unittest.mock import patch + +import pytest + +from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool + + +@pytest.fixture +def brave_tool(): + return BraveSearchTool(n_results=2) + + +def test_brave_tool_initialization(): + tool = BraveSearchTool() + assert tool.n_results == 10 + assert tool.save_file is False + + +@patch("requests.get") +def test_brave_tool_search(mock_get, brave_tool): + mock_response = { + "web": { + "results": [ + { + "title": "Test Title", + "url": "http://test.com", + "description": "Test Description", + } + ] + } + } + mock_get.return_value.json.return_value = mock_response + + result = brave_tool.run(search_query="test") + assert "Test Title" in result + assert "http://test.com" in result + + +def test_brave_tool(): + tool = BraveSearchTool( + n_results=2, + ) + x = tool.run(search_query="ChatGPT") + print(x) + + +if __name__ == "__main__": + test_brave_tool() + test_brave_tool_initialization() + # test_brave_tool_search(brave_tool)