From 96e52767ad417738c684e6ff7470ce25a458634e Mon Sep 17 00:00:00 2001 From: Ernest Poletaev Date: Fri, 25 Oct 2024 22:03:59 +0700 Subject: [PATCH 01/18] fix: web scraper concatenate words --- .../tools/scrape_website_tool/scrape_website_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 7173c2156..3cfb67bae 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -1,4 +1,5 @@ import os +import re from typing import Any, Optional, Type import requests @@ -67,7 +68,6 @@ class ScrapeWebsiteTool(BaseTool): page.encoding = page.apparent_encoding parsed = BeautifulSoup(page.text, "html.parser") - text = parsed.get_text() - text = "\n".join([i for i in text.split("\n") if i.strip() != ""]) - text = " ".join([i for i in text.split(" ") if i.strip() != ""]) + text = parsed.get_text(" ") + text = re.sub('\s+', ' ', text) return text From 1f8791953e41194fe0c34761076096824c844bf8 Mon Sep 17 00:00:00 2001 From: Ernest Poletaev Date: Fri, 25 Oct 2024 22:33:24 +0700 Subject: [PATCH 02/18] fix: retain line breaks --- .../tools/scrape_website_tool/scrape_website_tool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 3cfb67bae..99df1d2dd 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -69,5 +69,6 @@ class ScrapeWebsiteTool(BaseTool): parsed = BeautifulSoup(page.text, "html.parser") text = parsed.get_text(" ") - text = re.sub('\s+', ' ', text) + text = re.sub('[ \t]+', ' ', text) + text = re.sub('\\s+\n\\s+', '\n', text) return text From 15970734e3690c81198ea238543d6a08000dd242 Mon Sep 17 00:00:00 2001 From: Mike Plachta Date: Wed, 27 Nov 2024 17:52:56 -0800 Subject: [PATCH 03/18] Jina Website Scraper v1 --- .../tools/jina_scrape_website_tool/README.md | 38 ++++++++++++++ .../jina_scrape_website_tool.py | 52 +++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/crewai_tools/tools/jina_scrape_website_tool/README.md create mode 100644 src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/README.md b/src/crewai_tools/tools/jina_scrape_website_tool/README.md new file mode 100644 index 000000000..0278e5aa0 --- /dev/null +++ b/src/crewai_tools/tools/jina_scrape_website_tool/README.md @@ -0,0 +1,38 @@ +# JinaScrapeWebsiteTool + +## Description +A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites. + +## Installation +Install the crewai_tools package +```shell +pip install 'crewai[tools]' +``` + +## Example +```python +from crewai_tools import JinaScrapeWebsiteTool + +# To enable scraping any website it finds during its execution +tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY') + +# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website +tool = JinaScrapeWebsiteTool(website_url='https://www.example.com') + +# With custom headers +tool = JinaScrapeWebsiteTool( + website_url='https://www.example.com', + custom_headers={'X-Target-Selector': 'body, .class, #id'} +) +``` + +## Authentication +The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key. + +## Arguments +- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read. +- `api_key`: Optional Jina.ai API key for authenticated access to the reader service. +- `custom_headers`: Optional dictionary of HTTP headers to use when making requests. + +## Note +This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities. \ No newline at end of file diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py new file mode 100644 index 000000000..7fec77938 --- /dev/null +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -0,0 +1,52 @@ +import requests +from typing import Type, Optional +from crewai_tools import BaseTool +from pydantic import BaseModel, Field + + +class JinaScrapeWebsiteToolInput(BaseModel): + """Input schema for JinaScrapeWebsiteTool.""" + website_url: str = Field(..., description="Mandatory website url to read the file") + + +class JinaScrapeWebsiteTool(BaseTool): + name: str = "JinaScrapeWebsiteTool" + description: str = "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput + website_url: Optional[str] = None + api_key: Optional[str] = None + headers: dict = {} + + def __init__( + self, + website_url: Optional[str] = None, + api_key: Optional[str] = None, + custom_headers: Optional[dict] = None, + **kwargs + ): + super().__init__(**kwargs) + if website_url is not None: + self.website_url = website_url + self.description = ( + f"A tool that can be used to read {website_url}'s content and return markdown content." + ) + self._generate_description() + + if custom_headers is not None: + self.headers = custom_headers + + if api_key is not None: + self.headers["Authorization"] = f"Bearer {api_key}" + + def _run(self, website_url: Optional[str] = None) -> str: + url = website_url or self.website_url + if not url: + raise ValueError("Website URL must be provided either during initialization or execution") + + response = requests.get( + f"https://r.jina.ai/{url}", + headers=self.headers, + timeout=15 + ) + response.raise_for_status() + return response.text From dd18c59a9bbb2d73842f041ee7c0717f76658d3d Mon Sep 17 00:00:00 2001 From: Mike Plachta Date: Wed, 27 Nov 2024 19:57:52 -0800 Subject: [PATCH 04/18] Update jina_scrape_website_tool.py --- .../tools/jina_scrape_website_tool/jina_scrape_website_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index 7fec77938..d887c085d 100644 --- a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -1,6 +1,6 @@ import requests from typing import Type, Optional -from crewai_tools import BaseTool +from ..base_tool import BaseTool from pydantic import BaseModel, Field From a94470772fbfad0e538858b8d245c89cfab1dde8 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 28 Nov 2024 19:09:44 -0500 Subject: [PATCH 05/18] Fix pydantic related errors on FirecrawlScrapeWebsiteTool --- .../firecrawl_scrape_website_tool.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index ee8e592ca..89478976d 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from crewai_tools.tools.base_tool import BaseTool @@ -24,6 +24,7 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): + model_config = ConfigDict(arbitrary_types_allowed=True) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema @@ -61,3 +62,11 @@ class FirecrawlScrapeWebsiteTool(BaseTool): "timeout": timeout, } return self.firecrawl.scrape_url(url, options) + +try: + from firecrawl import FirecrawlApp + FirecrawlScrapeWebsiteTool.model_rebuild() +except ImportError: + raise ImportError( + "`firecrawl` package not found, please run `pip install firecrawl-py`" + ) \ No newline at end of file From 49ad43ff083c924e3ed510b84999efd3ba0144c8 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 29 Nov 2024 05:51:55 -0500 Subject: [PATCH 06/18] Do not reraise exception Tool use is optional and missing dependency should not raise error --- .../firecrawl_scrape_website_tool.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 89478976d..cb00a46c6 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -65,8 +65,10 @@ class FirecrawlScrapeWebsiteTool(BaseTool): try: from firecrawl import FirecrawlApp + # Must rebuild model after class is defined FirecrawlScrapeWebsiteTool.model_rebuild() except ImportError: - raise ImportError( - "`firecrawl` package not found, please run `pip install firecrawl-py`" - ) \ No newline at end of file + """ + When this tool is not used, then exception can be ignored. + """ + pass \ No newline at end of file From e677a271e53afa265a55f5ad77f85b4cceba9ee4 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 29 Nov 2024 05:57:09 -0500 Subject: [PATCH 07/18] More explicit model config --- .../firecrawl_scrape_website_tool.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index cb00a46c6..84b61209b 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -24,7 +24,11 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): - model_config = ConfigDict(arbitrary_types_allowed=True) + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + frozen=False + ) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema @@ -63,6 +67,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool): } return self.firecrawl.scrape_url(url, options) + try: from firecrawl import FirecrawlApp # Must rebuild model after class is defined @@ -71,4 +76,4 @@ except ImportError: """ When this tool is not used, then exception can be ignored. """ - pass \ No newline at end of file + pass From eed6a38ea4060e44d55a072cef4a47dfcb0ff266 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 29 Nov 2024 11:41:47 -0500 Subject: [PATCH 08/18] Fix pydantic related errors. --- .../firecrawl_crawl_website_tool.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index a535b6c63..672656fc9 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from crewai_tools.tools.base_tool import BaseTool @@ -20,6 +20,11 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): class FirecrawlCrawlWebsiteTool(BaseTool): + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + frozen=False + ) name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema @@ -50,3 +55,14 @@ class FirecrawlCrawlWebsiteTool(BaseTool): options = {"crawlerOptions": crawler_options, "pageOptions": page_options} return self.firecrawl.crawl_url(url, options) + + +try: + from firecrawl import FirecrawlApp + # Must rebuild model after class is defined + FirecrawlCrawlWebsiteTool.model_rebuild() +except ImportError: + """ + When this tool is not used, then exception can be ignored. + """ + pass \ No newline at end of file From 6c242ef3bbfe722d4159e60c931d20e2a38a0570 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 14:04:06 -0600 Subject: [PATCH 09/18] add brave search tool --- .../tools/brave_search_tool/README.md | 30 +++++++ .../brave_search_tool/brave_search_tool.py | 82 +++++++++++++++++++ tests/tools/brave_search_tool_test.py | 13 +++ 3 files changed, 125 insertions(+) create mode 100644 src/crewai_tools/tools/brave_search_tool/README.md create mode 100644 src/crewai_tools/tools/brave_search_tool/brave_search_tool.py create mode 100644 tests/tools/brave_search_tool_test.py diff --git a/src/crewai_tools/tools/brave_search_tool/README.md b/src/crewai_tools/tools/brave_search_tool/README.md new file mode 100644 index 000000000..a66210491 --- /dev/null +++ b/src/crewai_tools/tools/brave_search_tool/README.md @@ -0,0 +1,30 @@ +# BraveSearchTool Documentation + +## Description +This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back. + +## Installation +To incorporate this tool into your project, follow the installation instructions below: +```shell +pip install 'crewai[tools]' +``` + +## Example +The following example demonstrates how to initialize the tool and execute a search with a given query: + +```python +from crewai_tools import BraveSearchTool + +# Initialize the tool for internet searching capabilities +tool = BraveSearchTool() +``` + +## Steps to Get Started +To effectively use the `BraveSearchTool`, follow these steps: + +1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. +2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys). +3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool. + +## Conclusion +By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py new file mode 100644 index 000000000..54f546f1e --- /dev/null +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -0,0 +1,82 @@ +import datetime +import os +from typing import Any, Optional, Type + +import requests +from pydantic import BaseModel, Field + +from crewai_tools.tools.base_tool import BaseTool + + +def _save_results_to_file(content: str) -> None: + """Saves the search results to a file.""" + filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" + with open(filename, "w") as file: + file.write(content) + print(f"Results saved to {filename}") + + +class BraveSearchToolSchema(BaseModel): + """Input for BraveSearchTool.""" + + search_query: str = Field( + ..., description="Mandatory search query you want to use to search the internet" + ) + + +class BraveSearchTool(BaseTool): + name: str = "Search the internet" + description: str = ( + "A tool that can be used to search the internet with a search_query." + ) + args_schema: Type[BaseModel] = BraveSearchToolSchema + search_url: str = "https://api.search.brave.com/res/v1/web/search" + country: Optional[str] = "" + n_results: int = 10 + save_file: bool = False + + def _run( + self, + **kwargs: Any, + ) -> Any: + search_query = kwargs.get("search_query") or kwargs.get("query") + save_file = kwargs.get("save_file", self.save_file) + n_results = kwargs.get("n_results", self.n_results) + + payload = {"q": search_query, "count": n_results} + + if self.country != "": + payload["country"] = self.country + + headers = { + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + "Accept": "application/json", + } + + response = requests.get(self.search_url, headers=headers, params=payload) + results = response.json() + + if "web" in results: + results = results["web"]["results"] + string = [] + for result in results: + try: + string.append( + "\n".join( + [ + f"Title: {result['title']}", + f"Link: {result['url']}", + f"Snippet: {result['description']}", + "---", + ] + ) + ) + except KeyError: + continue + + content = "\n".join(string) + if save_file: + _save_results_to_file(content) + return f"\nSearch results: {content}\n" + else: + return results diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py new file mode 100644 index 000000000..16c1bcb92 --- /dev/null +++ b/tests/tools/brave_search_tool_test.py @@ -0,0 +1,13 @@ +from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool + + +def test_brave_tool(): + tool = BraveSearchTool( + n_results=2, + ) + + print(tool.run(search_query="ChatGPT")) + + +if __name__ == "__main__": + test_brave_tool() From d168b8e24554e37a706d0af18c4b82af483fd442 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 21:36:28 -0600 Subject: [PATCH 10/18] add error handling --- .../tools/brave_search_tool/__init__.py | 0 .../brave_search_tool/brave_search_tool.py | 90 ++++++++++++------- tests/tools/brave_search_tool_test.py | 37 ++++++++ 3 files changed, 96 insertions(+), 31 deletions(-) create mode 100644 src/crewai_tools/tools/brave_search_tool/__init__.py diff --git a/src/crewai_tools/tools/brave_search_tool/__init__.py b/src/crewai_tools/tools/brave_search_tool/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 54f546f1e..6a8818d75 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -25,6 +25,18 @@ class BraveSearchToolSchema(BaseModel): class BraveSearchTool(BaseTool): + """ + BraveSearchTool - A tool for performing web searches using the Brave Search API. + + This module provides functionality to search the internet using Brave's Search API, + supporting customizable result counts and country-specific searches. + + Dependencies: + - requests + - pydantic + - python-dotenv (for API key management) + """ + name: str = "Search the internet" description: str = ( "A tool that can be used to search the internet with a search_query." @@ -35,48 +47,64 @@ class BraveSearchTool(BaseTool): n_results: int = 10 save_file: bool = False + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if "BRAVE_API_KEY" not in os.environ: + raise ValueError( + "BRAVE_API_KEY environment variable is required for BraveSearchTool" + ) + def _run( self, **kwargs: Any, ) -> Any: - search_query = kwargs.get("search_query") or kwargs.get("query") - save_file = kwargs.get("save_file", self.save_file) - n_results = kwargs.get("n_results", self.n_results) + try: + search_query = kwargs.get("search_query") or kwargs.get("query") + if not search_query: + raise ValueError("Search query is required") - payload = {"q": search_query, "count": n_results} + save_file = kwargs.get("save_file", self.save_file) + n_results = kwargs.get("n_results", self.n_results) - if self.country != "": - payload["country"] = self.country + payload = {"q": search_query, "count": n_results} - headers = { - "X-Subscription-Token": os.environ["BRAVE_API_KEY"], - "Accept": "application/json", - } + if self.country != "": + payload["country"] = self.country - response = requests.get(self.search_url, headers=headers, params=payload) - results = response.json() + headers = { + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + "Accept": "application/json", + } - if "web" in results: - results = results["web"]["results"] - string = [] - for result in results: - try: - string.append( - "\n".join( - [ - f"Title: {result['title']}", - f"Link: {result['url']}", - f"Snippet: {result['description']}", - "---", - ] + response = requests.get(self.search_url, headers=headers, params=payload) + response.raise_for_status() # Handle non-200 responses + results = response.json() + + if "web" in results: + results = results["web"]["results"] + string = [] + for result in results: + try: + string.append( + "\n".join( + [ + f"Title: {result['title']}", + f"Link: {result['url']}", + f"Snippet: {result['description']}", + "---", + ] + ) ) - ) - except KeyError: - continue + except KeyError: + continue content = "\n".join(string) - if save_file: - _save_results_to_file(content) + except requests.RequestException as e: + return f"Error performing search: {str(e)}" + except KeyError as e: + return f"Error parsing search results: {str(e)}" + if save_file: + _save_results_to_file(content) return f"\nSearch results: {content}\n" else: - return results + return content diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py index 16c1bcb92..969bd48fe 100644 --- a/tests/tools/brave_search_tool_test.py +++ b/tests/tools/brave_search_tool_test.py @@ -1,6 +1,41 @@ +from unittest.mock import patch + +import pytest + from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool +@pytest.fixture +def brave_tool(): + return BraveSearchTool(n_results=2) + + +def test_brave_tool_initialization(): + tool = BraveSearchTool() + assert tool.n_results == 10 + assert tool.save_file is False + + +@patch("requests.get") +def test_brave_tool_search(mock_get, brave_tool): + mock_response = { + "web": { + "results": [ + { + "title": "Test Title", + "url": "http://test.com", + "description": "Test Description", + } + ] + } + } + mock_get.return_value.json.return_value = mock_response + + result = brave_tool.run(search_query="test") + assert "Test Title" in result + assert "http://test.com" in result + + def test_brave_tool(): tool = BraveSearchTool( n_results=2, @@ -11,3 +46,5 @@ def test_brave_tool(): if __name__ == "__main__": test_brave_tool() + test_brave_tool_initialization() + # test_brave_tool_search(brave_tool) From 5532ea8ff72993860b85326d7299351a0b23c3b5 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 21:51:46 -0600 Subject: [PATCH 11/18] add lru caching --- src/crewai_tools/tools/brave_search_tool/brave_search_tool.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 6a8818d75..5ff451484 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -1,5 +1,6 @@ import datetime import os +from functools import lru_cache from typing import Any, Optional, Type import requests @@ -54,6 +55,7 @@ class BraveSearchTool(BaseTool): "BRAVE_API_KEY environment variable is required for BraveSearchTool" ) + @lru_cache(maxsize=100) def _run( self, **kwargs: Any, From e7e059d02a4fa09f2b13873643a4ce38c4c45dc2 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 22:08:29 -0600 Subject: [PATCH 12/18] add rate limiting --- .../tools/brave_search_tool/brave_search_tool.py | 13 ++++++++++--- tests/tools/brave_search_tool_test.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 5ff451484..8d6a9a182 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -1,7 +1,7 @@ import datetime import os -from functools import lru_cache -from typing import Any, Optional, Type +import time +from typing import Any, ClassVar, Optional, Type import requests from pydantic import BaseModel, Field @@ -47,6 +47,8 @@ class BraveSearchTool(BaseTool): country: Optional[str] = "" n_results: int = 10 save_file: bool = False + _last_request_time: ClassVar[float] = 0 + _min_request_interval: ClassVar[float] = 1.0 # seconds def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -55,11 +57,16 @@ class BraveSearchTool(BaseTool): "BRAVE_API_KEY environment variable is required for BraveSearchTool" ) - @lru_cache(maxsize=100) def _run( self, **kwargs: Any, ) -> Any: + current_time = time.time() + if (current_time - self._last_request_time) < self._min_request_interval: + time.sleep( + self._min_request_interval - (current_time - self._last_request_time) + ) + BraveSearchTool._last_request_time = time.time() try: search_query = kwargs.get("search_query") or kwargs.get("query") if not search_query: diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py index 969bd48fe..36300f723 100644 --- a/tests/tools/brave_search_tool_test.py +++ b/tests/tools/brave_search_tool_test.py @@ -40,8 +40,8 @@ def test_brave_tool(): tool = BraveSearchTool( n_results=2, ) - - print(tool.run(search_query="ChatGPT")) + x = tool.run(search_query="ChatGPT") + print(x) if __name__ == "__main__": From 95cc6835a130a35fab52bae5c9e41e7073fc0ef0 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 22:30:31 -0600 Subject: [PATCH 13/18] update name --- src/crewai_tools/tools/brave_search_tool/brave_search_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 8d6a9a182..dceff1d57 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -38,7 +38,7 @@ class BraveSearchTool(BaseTool): - python-dotenv (for API key management) """ - name: str = "Search the internet" + name: str = "Brave Web Search the internet" description: str = ( "A tool that can be used to search the internet with a search_query." ) From e0d3ee5b23b1999e4a8b01904e61fc19c85a41c0 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:35:23 +0800 Subject: [PATCH 14/18] docs: add Discourse community link to contact section Add link to Discourse community platform in the contact section to provide users with an additional support channel alongside Discord. --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 491890877..aca20f640 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ In the realm of CrewAI agents, tools are pivotal for enhancing functionality. Th

-[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) +[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) | [Discourse](https://community.crewai.com/)

@@ -140,6 +140,4 @@ Thank you for your interest in enhancing the capabilities of AI agents through a ## Contact -For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb) or open an issue in this repository. - - +For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb), [Discourse](https://community.crewai.com/) or open an issue in this repository. From a64cccbd724a9c24fc825a16de0ec1de2fc39ed9 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Wed, 4 Dec 2024 22:28:30 -0600 Subject: [PATCH 15/18] add BraveSearchTool to init --- src/crewai_tools/__init__.py | 3 ++- src/crewai_tools/tools/__init__.py | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 6bd8dfd71..5f9a81d9c 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -1,4 +1,5 @@ from .tools import ( + BraveSearchTool, BrowserbaseLoadTool, CodeDocsSearchTool, CodeInterpreterTool, @@ -19,6 +20,7 @@ from .tools import ( LlamaIndexTool, MDXSearchTool, MultiOnTool, + MySQLSearchTool, NL2SQLTool, PDFSearchTool, PGSearchTool, @@ -40,6 +42,5 @@ from .tools import ( XMLSearchTool, YoutubeChannelSearchTool, YoutubeVideoSearchTool, - MySQLSearchTool ) from .tools.base_tool import BaseTool, Tool, tool diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 9016c57fd..73a96f4cf 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -1,3 +1,4 @@ +from .brave_search_tool.brave_search_tool import BraveSearchTool from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool @@ -11,10 +12,10 @@ from .exa_tools.exa_search_tool import EXASearchTool from .file_read_tool.file_read_tool import FileReadTool from .file_writer_tool.file_writer_tool import FileWriterTool from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( - FirecrawlCrawlWebsiteTool + FirecrawlCrawlWebsiteTool, ) from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( - FirecrawlScrapeWebsiteTool + FirecrawlScrapeWebsiteTool, ) from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool from .github_search_tool.github_search_tool import GithubSearchTool @@ -22,16 +23,17 @@ from .json_search_tool.json_search_tool import JSONSearchTool from .llamaindex_tool.llamaindex_tool import LlamaIndexTool from .mdx_seach_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool +from .mysql_search_tool.mysql_search_tool import MySQLSearchTool from .nl2sql.nl2sql_tool import NL2SQLTool from .pdf_search_tool.pdf_search_tool import PDFSearchTool from .pg_seach_tool.pg_search_tool import PGSearchTool from .rag.rag_tool import RagTool from .scrape_element_from_website.scrape_element_from_website import ( - ScrapeElementFromWebsiteTool + ScrapeElementFromWebsiteTool, ) from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ( - ScrapflyScrapeWebsiteTool + ScrapflyScrapeWebsiteTool, ) from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool from .serper_dev_tool.serper_dev_tool import SerperDevTool @@ -46,7 +48,6 @@ from .vision_tool.vision_tool import VisionTool from .website_search.website_search_tool import WebsiteSearchTool from .xml_search_tool.xml_search_tool import XMLSearchTool from .youtube_channel_search_tool.youtube_channel_search_tool import ( - YoutubeChannelSearchTool + YoutubeChannelSearchTool, ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool -from .mysql_search_tool.mysql_search_tool import MySQLSearchTool From d5fb31e645ccf0ebd7299442c6b099d6ca116e2c Mon Sep 17 00:00:00 2001 From: Brandon Hancock Date: Thu, 5 Dec 2024 13:16:48 -0500 Subject: [PATCH 16/18] update basetool dependencies to use root crewai repo --- README.md | 4 +- src/crewai_tools/__init__.py | 1 - src/crewai_tools/tools/base_tool.py | 59 ------------------- .../brave_search_tool/brave_search_tool.py | 3 +- .../browserbase_load_tool.py | 3 +- .../code_interpreter_tool.py | 3 +- .../tools/composio_tool/composio_tool.py | 3 +- .../tools/dalle_tool/dalle_tool.py | 3 +- .../directory_read_tool.py | 3 +- .../tools/exa_tools/exa_base_tool.py | 4 +- .../tools/exa_tools/exa_search_tool.py | 40 +++++++------ .../tools/file_read_tool/file_read_tool.py | 3 +- .../file_writer_tool/file_writer_tool.py | 16 ++--- .../firecrawl_crawl_website_tool.py | 12 ++-- .../firecrawl_scrape_website_tool.py | 10 ++-- .../firecrawl_search_tool.py | 3 +- .../jina_scrape_website_tool.py | 26 ++++---- .../tools/llamaindex_tool/llamaindex_tool.py | 3 +- .../tools/multion_tool/multion_tool.py | 2 +- src/crewai_tools/tools/nl2sql/nl2sql_tool.py | 6 +- src/crewai_tools/tools/rag/rag_tool.py | 3 +- .../scrape_element_from_website.py | 3 +- .../scrape_website_tool.py | 7 +-- .../scrapfly_scrape_website_tool.py | 3 +- .../selenium_scraping_tool.py | 4 +- .../tools/serper_dev_tool/serper_dev_tool.py | 3 +- .../serply_news_search_tool.py | 3 +- .../serply_scholar_search_tool.py | 3 +- .../serply_api_tool/serply_web_search_tool.py | 3 +- .../tools/spider_tool/spider_tool.py | 3 +- .../tools/vision_tool/vision_tool.py | 3 +- 31 files changed, 82 insertions(+), 163 deletions(-) delete mode 100644 src/crewai_tools/tools/base_tool.py diff --git a/README.md b/README.md index aca20f640..43cdc9b57 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ There are three ways to create tools for crewAI agents: ### Subclassing `BaseTool` ```python -from crewai_tools import BaseTool +from crewai.tools import BaseTool class MyCustomTool(BaseTool): name: str = "Name of my tool" @@ -70,7 +70,7 @@ Define a new class inheriting from `BaseTool`, specifying `name`, `description`, For a simpler approach, create a `Tool` object directly with the required attributes and a functional logic. ```python -from crewai_tools import tool +from crewai.tools import BaseTool @tool("Name of my tool") def my_tool(question: str) -> str: """Clear description for what this tool is useful for, you agent will need this information to use it.""" diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 5f9a81d9c..3fad09d9f 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -43,4 +43,3 @@ from .tools import ( YoutubeChannelSearchTool, YoutubeVideoSearchTool, ) -from .tools.base_tool import BaseTool, Tool, tool diff --git a/src/crewai_tools/tools/base_tool.py b/src/crewai_tools/tools/base_tool.py deleted file mode 100644 index 674e33030..000000000 --- a/src/crewai_tools/tools/base_tool.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Any, Callable - -from pydantic import BaseModel as PydanticBaseModel - -from crewai.tools.base_tool import BaseTool -from crewai.tools.structured_tool import CrewStructuredTool - - -class Tool(BaseTool): - func: Callable - """The function that will be executed when the tool is called.""" - - def _run(self, *args: Any, **kwargs: Any) -> Any: - return self.func(*args, **kwargs) - - -def to_langchain( - tools: list[BaseTool | CrewStructuredTool], -) -> list[CrewStructuredTool]: - return [t.to_structured_tool() if isinstance(t, BaseTool) else t for t in tools] - - -def tool(*args): - """ - Decorator to create a tool from a function. - """ - - def _make_with_name(tool_name: str) -> Callable: - def _make_tool(f: Callable) -> BaseTool: - if f.__doc__ is None: - raise ValueError("Function must have a docstring") - if f.__annotations__ is None: - raise ValueError("Function must have type annotations") - - class_name = "".join(tool_name.split()).title() - args_schema = type( - class_name, - (PydanticBaseModel,), - { - "__annotations__": { - k: v for k, v in f.__annotations__.items() if k != "return" - }, - }, - ) - - return Tool( - name=tool_name, - description=f.__doc__, - func=f, - args_schema=args_schema, - ) - - return _make_tool - - if len(args) == 1 and callable(args[0]): - return _make_with_name(args[0].__name__)(args[0]) - if len(args) == 1 and isinstance(args[0], str): - return _make_with_name(args[0]) - raise ValueError("Invalid arguments") diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index dceff1d57..11035739d 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -4,10 +4,9 @@ import time from typing import Any, ClassVar, Optional, Type import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - def _save_results_to_file(content: str) -> None: """Saves the search results to a file.""" diff --git a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index 514664557..54c33db3c 100644 --- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class BrowserbaseLoadToolSchema(BaseModel): url: str = Field(description="Website URL") diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index a4488b35f..61c180fe3 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -3,10 +3,9 @@ import os from typing import List, Optional, Type import docker +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class CodeInterpreterSchema(BaseModel): """Input for CodeInterpreterTool.""" diff --git a/src/crewai_tools/tools/composio_tool/composio_tool.py b/src/crewai_tools/tools/composio_tool/composio_tool.py index 62068c0bd..4823441bf 100644 --- a/src/crewai_tools/tools/composio_tool/composio_tool.py +++ b/src/crewai_tools/tools/composio_tool/composio_tool.py @@ -5,8 +5,7 @@ Composio tools wrapper. import typing as t import typing_extensions as te - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class ComposioTool(BaseTool): diff --git a/src/crewai_tools/tools/dalle_tool/dalle_tool.py b/src/crewai_tools/tools/dalle_tool/dalle_tool.py index da6adb2b1..7040de11a 100644 --- a/src/crewai_tools/tools/dalle_tool/dalle_tool.py +++ b/src/crewai_tools/tools/dalle_tool/dalle_tool.py @@ -1,11 +1,10 @@ import json from typing import Type +from crewai.tools import BaseTool from openai import OpenAI from pydantic import BaseModel -from crewai_tools.tools.base_tool import BaseTool - class ImagePromptSchema(BaseModel): """Input for Dall-E Tool.""" diff --git a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 3d308ba45..6033202be 100644 --- a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -1,10 +1,9 @@ import os from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedDirectoryReadToolSchema(BaseModel): """Input for DirectoryReadTool.""" diff --git a/src/crewai_tools/tools/exa_tools/exa_base_tool.py b/src/crewai_tools/tools/exa_tools/exa_base_tool.py index d2fe6217c..295b283ad 100644 --- a/src/crewai_tools/tools/exa_tools/exa_base_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_base_tool.py @@ -1,10 +1,8 @@ -import os from typing import Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class EXABaseToolToolSchema(BaseModel): """Input for EXABaseTool.""" diff --git a/src/crewai_tools/tools/exa_tools/exa_search_tool.py b/src/crewai_tools/tools/exa_tools/exa_search_tool.py index 30f77d1ee..6724c2417 100644 --- a/src/crewai_tools/tools/exa_tools/exa_search_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_search_tool.py @@ -1,28 +1,30 @@ import os -import requests from typing import Any +import requests + from .exa_base_tool import EXABaseTool + class EXASearchTool(EXABaseTool): - def _run( - self, - **kwargs: Any, - ) -> Any: - search_query = kwargs.get('search_query') - if search_query is None: - search_query = kwargs.get('query') + def _run( + self, + **kwargs: Any, + ) -> Any: + search_query = kwargs.get("search_query") + if search_query is None: + search_query = kwargs.get("query") - payload = { - "query": search_query, - "type": "magic", - } + payload = { + "query": search_query, + "type": "magic", + } - headers = self.headers.copy() - headers["x-api-key"] = os.environ['EXA_API_KEY'] + headers = self.headers.copy() + headers["x-api-key"] = os.environ["EXA_API_KEY"] - response = requests.post(self.search_url, json=payload, headers=headers) - results = response.json() - if 'results' in results: - results = super()._parse_results(results['results']) - return results + response = requests.post(self.search_url, json=payload, headers=headers) + results = response.json() + if "results" in results: + results = super()._parse_results(results["results"]) + return results diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 265dca54a..fe34c9d8b 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedFileReadToolSchema(BaseModel): """Input for FileReadTool.""" diff --git a/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py b/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py index a008e4a75..ed454a1bd 100644 --- a/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py +++ b/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py @@ -1,16 +1,18 @@ import os -from typing import Any, Optional, Type -from pydantic import BaseModel -from ..base_tool import BaseTool from distutils.util import strtobool +from typing import Any, Optional, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel class FileWriterToolInput(BaseModel): - filename: str + filename: str directory: Optional[str] = "./" overwrite: str = "False" content: str - + + class FileWriterTool(BaseTool): name: str = "File Writer Tool" description: str = ( @@ -26,7 +28,7 @@ class FileWriterTool(BaseTool): # Construct the full path filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"]) - + # Convert overwrite to boolean kwargs["overwrite"] = bool(strtobool(kwargs["overwrite"])) @@ -46,4 +48,4 @@ class FileWriterTool(BaseTool): except KeyError as e: return f"An error occurred while accessing key: {str(e)}" except Exception as e: - return f"An error occurred while writing to the file: {str(e)}" \ No newline at end of file + return f"An error occurred while writing to the file: {str(e)}" diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 672656fc9..c23ff2100 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field, ConfigDict - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field # Type checking import if TYPE_CHECKING: @@ -21,9 +20,7 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): class FirecrawlCrawlWebsiteTool(BaseTool): model_config = ConfigDict( - arbitrary_types_allowed=True, - validate_assignment=True, - frozen=False + arbitrary_types_allowed=True, validate_assignment=True, frozen=False ) name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" @@ -59,10 +56,11 @@ class FirecrawlCrawlWebsiteTool(BaseTool): try: from firecrawl import FirecrawlApp + # Must rebuild model after class is defined FirecrawlCrawlWebsiteTool.model_rebuild() except ImportError: """ When this tool is not used, then exception can be ignored. """ - pass \ No newline at end of file + pass diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 84b61209b..9ab7d293e 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field, ConfigDict - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field # Type checking import if TYPE_CHECKING: @@ -25,9 +24,7 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): model_config = ConfigDict( - arbitrary_types_allowed=True, - validate_assignment=True, - frozen=False + arbitrary_types_allowed=True, validate_assignment=True, frozen=False ) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" @@ -70,6 +67,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool): try: from firecrawl import FirecrawlApp + # Must rebuild model after class is defined FirecrawlScrapeWebsiteTool.model_rebuild() except ImportError: diff --git a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 13c3b82ee..5efd274de 100644 --- a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -1,9 +1,8 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - # Type checking import if TYPE_CHECKING: from firecrawl import FirecrawlApp diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index d887c085d..a10a4ffdb 100644 --- a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -1,17 +1,21 @@ +from typing import Optional, Type + import requests -from typing import Type, Optional -from ..base_tool import BaseTool +from crewai.tools import BaseTool from pydantic import BaseModel, Field class JinaScrapeWebsiteToolInput(BaseModel): """Input schema for JinaScrapeWebsiteTool.""" + website_url: str = Field(..., description="Mandatory website url to read the file") class JinaScrapeWebsiteTool(BaseTool): name: str = "JinaScrapeWebsiteTool" - description: str = "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + description: str = ( + "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + ) args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput website_url: Optional[str] = None api_key: Optional[str] = None @@ -22,31 +26,29 @@ class JinaScrapeWebsiteTool(BaseTool): website_url: Optional[str] = None, api_key: Optional[str] = None, custom_headers: Optional[dict] = None, - **kwargs + **kwargs, ): super().__init__(**kwargs) if website_url is not None: self.website_url = website_url - self.description = ( - f"A tool that can be used to read {website_url}'s content and return markdown content." - ) + self.description = f"A tool that can be used to read {website_url}'s content and return markdown content." self._generate_description() if custom_headers is not None: self.headers = custom_headers - + if api_key is not None: self.headers["Authorization"] = f"Bearer {api_key}" def _run(self, website_url: Optional[str] = None) -> str: url = website_url or self.website_url if not url: - raise ValueError("Website URL must be provided either during initialization or execution") + raise ValueError( + "Website URL must be provided either during initialization or execution" + ) response = requests.get( - f"https://r.jina.ai/{url}", - headers=self.headers, - timeout=15 + f"https://r.jina.ai/{url}", headers=self.headers, timeout=15 ) response.raise_for_status() return response.text diff --git a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py index af5c93e1f..61a747956 100644 --- a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py +++ b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type, cast +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class LlamaIndexTool(BaseTool): """Tool to wrap LlamaIndex tools/query engines.""" diff --git a/src/crewai_tools/tools/multion_tool/multion_tool.py b/src/crewai_tools/tools/multion_tool/multion_tool.py index 2dc944f23..a991074da 100644 --- a/src/crewai_tools/tools/multion_tool/multion_tool.py +++ b/src/crewai_tools/tools/multion_tool/multion_tool.py @@ -2,7 +2,7 @@ from typing import Any, Optional -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class MultiOnTool(BaseTool): diff --git a/src/crewai_tools/tools/nl2sql/nl2sql_tool.py b/src/crewai_tools/tools/nl2sql/nl2sql_tool.py index 22c3a299b..786550ee7 100644 --- a/src/crewai_tools/tools/nl2sql/nl2sql_tool.py +++ b/src/crewai_tools/tools/nl2sql/nl2sql_tool.py @@ -1,11 +1,10 @@ -from typing import Any, Union +from typing import Any, Type, Union -from ..base_tool import BaseTool +from crewai.tools import BaseTool from pydantic import BaseModel, Field from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker -from typing import Type, Any class NL2SQLToolInput(BaseModel): sql_query: str = Field( @@ -13,6 +12,7 @@ class NL2SQLToolInput(BaseModel): description="The SQL query to execute.", ) + class NL2SQLTool(BaseTool): name: str = "NL2SQLTool" description: str = "Converts natural language to SQL queries and executes them." diff --git a/src/crewai_tools/tools/rag/rag_tool.py b/src/crewai_tools/tools/rag/rag_tool.py index 97291cd81..a9bbdab53 100644 --- a/src/crewai_tools/tools/rag/rag_tool.py +++ b/src/crewai_tools/tools/rag/rag_tool.py @@ -1,10 +1,9 @@ from abc import ABC, abstractmethod from typing import Any +from crewai.tools import BaseTool from pydantic import BaseModel, Field, model_validator -from crewai_tools.tools.base_tool import BaseTool - class Adapter(BaseModel, ABC): class Config: diff --git a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index 56bb27195..14757d247 100644 --- a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type import requests from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedScrapeElementFromWebsiteToolSchema(BaseModel): """Input for ScrapeElementFromWebsiteTool.""" diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 99df1d2dd..8cfc5d136 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -4,10 +4,9 @@ from typing import Any, Optional, Type import requests from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedScrapeWebsiteToolSchema(BaseModel): """Input for ScrapeWebsiteTool.""" @@ -69,6 +68,6 @@ class ScrapeWebsiteTool(BaseTool): parsed = BeautifulSoup(page.text, "html.parser") text = parsed.get_text(" ") - text = re.sub('[ \t]+', ' ', text) - text = re.sub('\\s+\n\\s+', '\n', text) + text = re.sub("[ \t]+", " ", text) + text = re.sub("\\s+\n\\s+", "\n", text) return text diff --git a/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index 5800e223c..b47ce8e5b 100644 --- a/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -1,10 +1,9 @@ import logging from typing import Any, Dict, Literal, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - logger = logging.getLogger(__file__) diff --git a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py index 970cde7ca..47910f35b 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py +++ b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py @@ -1,14 +1,12 @@ import time from typing import Any, Optional, Type -from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By -from ..base_tool import BaseTool - class FixedSeleniumScrapingToolSchema(BaseModel): """Input for SeleniumScrapingTool.""" diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index ca118326e..8f53ce0a4 100644 --- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -4,10 +4,9 @@ import os from typing import Any, Optional, Type import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - def _save_results_to_file(content: str) -> None: """Saves the search results to a file.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py index 21e6e9872..c058091a2 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyNewsSearchToolSchema(BaseModel): """Input for Serply News Search.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py index 1ac6337f6..3ed9de4ab 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyScholarSearchToolSchema(BaseModel): """Input for Serply Scholar Search.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py index b65fa21d1..b4d1ae4b5 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyWebSearchToolSchema(BaseModel): """Input for Serply Web Search.""" diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index c01b5e2a3..94da9f6fe 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -1,9 +1,8 @@ from typing import Any, Dict, Literal, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SpiderToolSchema(BaseModel): url: str = Field(description="Website URL") diff --git a/src/crewai_tools/tools/vision_tool/vision_tool.py b/src/crewai_tools/tools/vision_tool/vision_tool.py index 6b7a21dbd..3ac3c3ae5 100644 --- a/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -2,11 +2,10 @@ import base64 from typing import Type import requests +from crewai.tools import BaseTool from openai import OpenAI from pydantic import BaseModel -from crewai_tools.tools.base_tool import BaseTool - class ImagePromptSchema(BaseModel): """Input for Vision Tool.""" From a0e0c2815273efe75760b4b83519f2d44500f916 Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Sun, 8 Dec 2024 21:44:19 -0800 Subject: [PATCH 17/18] setup weaviate vector search tool --- src/crewai_tools/__init__.py | 1 + src/crewai_tools/tools/__init__.py | 1 + .../tools/weaviate_tool/README.md | 80 +++++++++++++++++ .../tools/weaviate_tool/vector_search.py | 89 +++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 src/crewai_tools/tools/weaviate_tool/README.md create mode 100644 src/crewai_tools/tools/weaviate_tool/vector_search.py diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 3fad09d9f..12523a214 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -42,4 +42,5 @@ from .tools import ( XMLSearchTool, YoutubeChannelSearchTool, YoutubeVideoSearchTool, + WeaviateVectorSearchTool, ) diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 73a96f4cf..23565dbea 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -51,3 +51,4 @@ from .youtube_channel_search_tool.youtube_channel_search_tool import ( YoutubeChannelSearchTool, ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool +from .weaviate_tool.vector_search import WeaviateVectorSearchTool diff --git a/src/crewai_tools/tools/weaviate_tool/README.md b/src/crewai_tools/tools/weaviate_tool/README.md new file mode 100644 index 000000000..42daa40e0 --- /dev/null +++ b/src/crewai_tools/tools/weaviate_tool/README.md @@ -0,0 +1,80 @@ +# WeaviateVectorSearchTool + +## Description +This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query. + +Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect + +## Installation +Install the crewai_tools package by executing the following command in your terminal: + +```shell +uv pip install 'crewai[tools]' +``` + +## Example +To utilize the WeaviateVectorSearchTool for different use cases, follow these examples: + +```python +from crewai_tools import WeaviateVectorSearchTool + +# To enable the tool to search any website the agent comes across or learns about during its operation +tool = WeaviateVectorSearchTool( + collection_name='example_collections', + limit=3, + weaviate_cluster_url="https://your-weaviate-cluster-url.com", + weaviate_api_key="your-weaviate-api-key", +) + +# or + +# Setup custom model for vectorizer and generative model +tool = WeaviateVectorSearchTool( + collection_name='example_collections', + limit=3, + vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"), + generative_model=Configure.Generative.openai(model="gpt-4o-mini"), + weaviate_cluster_url="https://your-weaviate-cluster-url.com", + weaviate_api_key="your-weaviate-api-key", +) + +# Adding the tool to an agent +rag_agent = Agent( + name="rag_agent", + role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.", + llm="gpt-4o-mini", + tools=[tool], +) +``` + +## Arguments +- `collection_name` : The name of the collection to search within. (Required) +- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required) +- `weaviate_api_key` : The API key for the Weaviate cluster. (Required) +- `limit` : The number of results to return. (Optional) +- `vectorizer` : The vectorizer to use. (Optional) +- `generative_model` : The generative model to use. (Optional) + +Preloading the Weaviate database with documents: + +```python +from crewai_tools import WeaviateVectorSearchTool + +# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect +test_docs = client.collections.get("test_collection_name") + + +docs_to_load = os.listdir("knowledge") +with test_docs.batch.dynamic() as batch: + for d in docs_to_load: + with open(os.path.join("knowledge", d), "r") as f: + content = f.read() + batch.add_object( + { + "content": content, + "year": d.split("_")[0], + } + ) +tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3) + +``` diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py new file mode 100644 index 000000000..ab80b6ce1 --- /dev/null +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -0,0 +1,89 @@ +import os +import json +import weaviate +from pydantic import BaseModel, Field +from typing import Type, Optional +from crewai.tools import BaseTool + +from weaviate.classes.config import Configure, Vectorizers +from weaviate.classes.init import Auth + + +class WeaviateToolSchema(BaseModel): + """Input for WeaviateTool.""" + + query: str = Field( + ..., + description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.", + ) + + +class WeaviateVectorSearchTool(BaseTool): + """Tool to search the Weaviate database""" + + name: str = "WeaviateVectorSearchTool" + description: str = "A tool to search the Weaviate database for relevant information on internal documents." + args_schema: Type[BaseModel] = WeaviateToolSchema + query: Optional[str] = None + + vectorizer: Optional[Vectorizers] = Field( + default=Configure.Vectorizer.text2vec_openai( + model="nomic-embed-text", + ) + ) + generative_model: Optional[str] = Field( + default=Configure.Generative.openai( + model="gpt-4o", + ), + ) + collection_name: Optional[str] = None + limit: Optional[int] = Field(default=3) + headers: Optional[dict] = Field( + default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]} + ) + weaviate_cluster_url: str = Field( + ..., + description="The URL of the Weaviate cluster", + ) + weaviate_api_key: str = Field( + ..., + description="The API key for the Weaviate cluster", + ) + + def _run(self, query: str) -> str: + """Search the Weaviate database + + Args: + query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question. + + Returns: + str: The result of the search query + """ + + if not self.weaviate_cluster_url or not self.weaviate_api_key: + raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set") + + client = weaviate.connect_to_weaviate_cloud( + cluster_url=self.weaviate_cluster_url, + auth_credentials=Auth.api_key(self.weaviate_api_key), + headers=self.headers, + ) + internal_docs = client.collections.get(self.collection_name) + + if not internal_docs: + internal_docs = client.collections.create( + name=self.collection_name, + vectorizer_config=self.vectorizer, + generative_config=self.generative_model, + ) + + response = internal_docs.query.near_text( + query=query, + limit=self.limit, + ) + json_response = "" + for obj in response.objects: + json_response += json.dumps(obj.properties, indent=2) + + client.close() + return json_response From d5d83cbd7eea3cebac36fbdaa027229ec875e2bb Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Sun, 8 Dec 2024 21:48:15 -0800 Subject: [PATCH 18/18] fix collection name docs --- src/crewai_tools/tools/weaviate_tool/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/weaviate_tool/README.md b/src/crewai_tools/tools/weaviate_tool/README.md index 42daa40e0..c48f2f70a 100644 --- a/src/crewai_tools/tools/weaviate_tool/README.md +++ b/src/crewai_tools/tools/weaviate_tool/README.md @@ -61,7 +61,7 @@ Preloading the Weaviate database with documents: from crewai_tools import WeaviateVectorSearchTool # Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect -test_docs = client.collections.get("test_collection_name") +test_docs = client.collections.get("example_collections") docs_to_load = os.listdir("knowledge")