From 96e52767ad417738c684e6ff7470ce25a458634e Mon Sep 17 00:00:00 2001 From: Ernest Poletaev Date: Fri, 25 Oct 2024 22:03:59 +0700 Subject: [PATCH 01/69] fix: web scraper concatenate words --- .../tools/scrape_website_tool/scrape_website_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 7173c2156..3cfb67bae 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -1,4 +1,5 @@ import os +import re from typing import Any, Optional, Type import requests @@ -67,7 +68,6 @@ class ScrapeWebsiteTool(BaseTool): page.encoding = page.apparent_encoding parsed = BeautifulSoup(page.text, "html.parser") - text = parsed.get_text() - text = "\n".join([i for i in text.split("\n") if i.strip() != ""]) - text = " ".join([i for i in text.split(" ") if i.strip() != ""]) + text = parsed.get_text(" ") + text = re.sub('\s+', ' ', text) return text From 1f8791953e41194fe0c34761076096824c844bf8 Mon Sep 17 00:00:00 2001 From: Ernest Poletaev Date: Fri, 25 Oct 2024 22:33:24 +0700 Subject: [PATCH 02/69] fix: retain line breaks --- .../tools/scrape_website_tool/scrape_website_tool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 3cfb67bae..99df1d2dd 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -69,5 +69,6 @@ class ScrapeWebsiteTool(BaseTool): parsed = BeautifulSoup(page.text, "html.parser") text = parsed.get_text(" ") - text = re.sub('\s+', ' ', text) + text = re.sub('[ \t]+', ' ', text) + text = re.sub('\\s+\n\\s+', '\n', text) return text From 1c37158208552f3ec84d4a8f269643c3a7eeb61c Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Wed, 20 Nov 2024 18:58:58 +0800 Subject: [PATCH 03/69] fix: correct variable name typo in exa_base_tool --- src/crewai_tools/tools/exa_tools/exa_base_tool.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/crewai_tools/tools/exa_tools/exa_base_tool.py b/src/crewai_tools/tools/exa_tools/exa_base_tool.py index 6273c5f7a..d2fe6217c 100644 --- a/src/crewai_tools/tools/exa_tools/exa_base_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_base_tool.py @@ -28,10 +28,10 @@ class EXABaseTool(BaseTool): } def _parse_results(self, results): - stirng = [] + string = [] for result in results: try: - stirng.append( + string.append( "\n".join( [ f"Title: {result['title']}", @@ -43,7 +43,7 @@ class EXABaseTool(BaseTool): ) ) except KeyError: - next + continue - content = "\n".join(stirng) + content = "\n".join(string) return f"\nSearch results: {content}\n" From 15970734e3690c81198ea238543d6a08000dd242 Mon Sep 17 00:00:00 2001 From: Mike Plachta Date: Wed, 27 Nov 2024 17:52:56 -0800 Subject: [PATCH 04/69] Jina Website Scraper v1 --- .../tools/jina_scrape_website_tool/README.md | 38 ++++++++++++++ .../jina_scrape_website_tool.py | 52 +++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/crewai_tools/tools/jina_scrape_website_tool/README.md create mode 100644 src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/README.md b/src/crewai_tools/tools/jina_scrape_website_tool/README.md new file mode 100644 index 000000000..0278e5aa0 --- /dev/null +++ b/src/crewai_tools/tools/jina_scrape_website_tool/README.md @@ -0,0 +1,38 @@ +# JinaScrapeWebsiteTool + +## Description +A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites. + +## Installation +Install the crewai_tools package +```shell +pip install 'crewai[tools]' +``` + +## Example +```python +from crewai_tools import JinaScrapeWebsiteTool + +# To enable scraping any website it finds during its execution +tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY') + +# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website +tool = JinaScrapeWebsiteTool(website_url='https://www.example.com') + +# With custom headers +tool = JinaScrapeWebsiteTool( + website_url='https://www.example.com', + custom_headers={'X-Target-Selector': 'body, .class, #id'} +) +``` + +## Authentication +The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key. + +## Arguments +- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read. +- `api_key`: Optional Jina.ai API key for authenticated access to the reader service. +- `custom_headers`: Optional dictionary of HTTP headers to use when making requests. + +## Note +This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities. \ No newline at end of file diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py new file mode 100644 index 000000000..7fec77938 --- /dev/null +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -0,0 +1,52 @@ +import requests +from typing import Type, Optional +from crewai_tools import BaseTool +from pydantic import BaseModel, Field + + +class JinaScrapeWebsiteToolInput(BaseModel): + """Input schema for JinaScrapeWebsiteTool.""" + website_url: str = Field(..., description="Mandatory website url to read the file") + + +class JinaScrapeWebsiteTool(BaseTool): + name: str = "JinaScrapeWebsiteTool" + description: str = "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput + website_url: Optional[str] = None + api_key: Optional[str] = None + headers: dict = {} + + def __init__( + self, + website_url: Optional[str] = None, + api_key: Optional[str] = None, + custom_headers: Optional[dict] = None, + **kwargs + ): + super().__init__(**kwargs) + if website_url is not None: + self.website_url = website_url + self.description = ( + f"A tool that can be used to read {website_url}'s content and return markdown content." + ) + self._generate_description() + + if custom_headers is not None: + self.headers = custom_headers + + if api_key is not None: + self.headers["Authorization"] = f"Bearer {api_key}" + + def _run(self, website_url: Optional[str] = None) -> str: + url = website_url or self.website_url + if not url: + raise ValueError("Website URL must be provided either during initialization or execution") + + response = requests.get( + f"https://r.jina.ai/{url}", + headers=self.headers, + timeout=15 + ) + response.raise_for_status() + return response.text From cbec6d5cd797f36088ab6f3c4f8f8957c79d68b1 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:15:18 +0800 Subject: [PATCH 05/69] docs: fix API key reference in MultiOnTool README --- src/crewai_tools/tools/multion_tool/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/crewai_tools/tools/multion_tool/README.md b/src/crewai_tools/tools/multion_tool/README.md index ea530037f..da92a0682 100644 --- a/src/crewai_tools/tools/multion_tool/README.md +++ b/src/crewai_tools/tools/multion_tool/README.md @@ -41,7 +41,7 @@ crew.kickoff() ## Arguments -- `api_key`: Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable. +- `api_key`: Specifies MultiOn API key. Default is the `MULTION_API_KEY` environment variable. - `local`: Use the local flag set as "true" to run the agent locally on your browser. Make sure the multion browser extension is installed and API Enabled is checked. - `max_steps`: Optional. Set the max_steps the multion agent can take for a command @@ -51,4 +51,3 @@ To effectively use the `MultiOnTool`, follow these steps: 1. **Install CrewAI**: Confirm that the `crewai[tools]` package is installed in your Python environment. 2. **Install and use MultiOn**: Follow MultiOn documentation for installing the MultiOn Browser Extension (https://docs.multion.ai/learn/browser-extension). 3. **Enable API Usage**: Click on the MultiOn extension in the extensions folder of your browser (not the hovering MultiOn icon on the web page) to open the extension configurations. Click the API Enabled toggle to enable the API - From dd18c59a9bbb2d73842f041ee7c0717f76658d3d Mon Sep 17 00:00:00 2001 From: Mike Plachta Date: Wed, 27 Nov 2024 19:57:52 -0800 Subject: [PATCH 06/69] Update jina_scrape_website_tool.py --- .../tools/jina_scrape_website_tool/jina_scrape_website_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index 7fec77938..d887c085d 100644 --- a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -1,6 +1,6 @@ import requests from typing import Type, Optional -from crewai_tools import BaseTool +from ..base_tool import BaseTool from pydantic import BaseModel, Field From a94470772fbfad0e538858b8d245c89cfab1dde8 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 28 Nov 2024 19:09:44 -0500 Subject: [PATCH 07/69] Fix pydantic related errors on FirecrawlScrapeWebsiteTool --- .../firecrawl_scrape_website_tool.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index ee8e592ca..89478976d 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from crewai_tools.tools.base_tool import BaseTool @@ -24,6 +24,7 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): + model_config = ConfigDict(arbitrary_types_allowed=True) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema @@ -61,3 +62,11 @@ class FirecrawlScrapeWebsiteTool(BaseTool): "timeout": timeout, } return self.firecrawl.scrape_url(url, options) + +try: + from firecrawl import FirecrawlApp + FirecrawlScrapeWebsiteTool.model_rebuild() +except ImportError: + raise ImportError( + "`firecrawl` package not found, please run `pip install firecrawl-py`" + ) \ No newline at end of file From 49ad43ff083c924e3ed510b84999efd3ba0144c8 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 29 Nov 2024 05:51:55 -0500 Subject: [PATCH 08/69] Do not reraise exception Tool use is optional and missing dependency should not raise error --- .../firecrawl_scrape_website_tool.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 89478976d..cb00a46c6 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -65,8 +65,10 @@ class FirecrawlScrapeWebsiteTool(BaseTool): try: from firecrawl import FirecrawlApp + # Must rebuild model after class is defined FirecrawlScrapeWebsiteTool.model_rebuild() except ImportError: - raise ImportError( - "`firecrawl` package not found, please run `pip install firecrawl-py`" - ) \ No newline at end of file + """ + When this tool is not used, then exception can be ignored. + """ + pass \ No newline at end of file From e677a271e53afa265a55f5ad77f85b4cceba9ee4 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 29 Nov 2024 05:57:09 -0500 Subject: [PATCH 09/69] More explicit model config --- .../firecrawl_scrape_website_tool.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index cb00a46c6..84b61209b 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -24,7 +24,11 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): - model_config = ConfigDict(arbitrary_types_allowed=True) + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + frozen=False + ) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema @@ -63,6 +67,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool): } return self.firecrawl.scrape_url(url, options) + try: from firecrawl import FirecrawlApp # Must rebuild model after class is defined @@ -71,4 +76,4 @@ except ImportError: """ When this tool is not used, then exception can be ignored. """ - pass \ No newline at end of file + pass From ec9951e28a8ce5eb66fcabf33b93cc84779b9a2d Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Sat, 30 Nov 2024 00:34:04 +0800 Subject: [PATCH 10/69] docs: improve CodeDocsSearchTool README - Fix tool name in custom model example --- src/crewai_tools/tools/code_docs_search_tool/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/code_docs_search_tool/README.md b/src/crewai_tools/tools/code_docs_search_tool/README.md index 879461427..f90398a11 100644 --- a/src/crewai_tools/tools/code_docs_search_tool/README.md +++ b/src/crewai_tools/tools/code_docs_search_tool/README.md @@ -32,7 +32,7 @@ Note: Substitute 'https://docs.example.com/reference' with your target documenta By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: ```python -tool = YoutubeVideoSearchTool( +tool = CodeDocsSearchTool( config=dict( llm=dict( provider="ollama", # or google, openai, anthropic, llama2, ... From eed6a38ea4060e44d55a072cef4a47dfcb0ff266 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 29 Nov 2024 11:41:47 -0500 Subject: [PATCH 11/69] Fix pydantic related errors. --- .../firecrawl_crawl_website_tool.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index a535b6c63..672656fc9 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from crewai_tools.tools.base_tool import BaseTool @@ -20,6 +20,11 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): class FirecrawlCrawlWebsiteTool(BaseTool): + model_config = ConfigDict( + arbitrary_types_allowed=True, + validate_assignment=True, + frozen=False + ) name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema @@ -50,3 +55,14 @@ class FirecrawlCrawlWebsiteTool(BaseTool): options = {"crawlerOptions": crawler_options, "pageOptions": page_options} return self.firecrawl.crawl_url(url, options) + + +try: + from firecrawl import FirecrawlApp + # Must rebuild model after class is defined + FirecrawlCrawlWebsiteTool.model_rebuild() +except ImportError: + """ + When this tool is not used, then exception can be ignored. + """ + pass \ No newline at end of file From 945ed7aaaa3962e5412e8b5476a476b39c0c36d9 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Sat, 30 Nov 2024 10:52:57 +0800 Subject: [PATCH 12/69] docs: fix typos and formatting in NL2SQL tool README --- src/crewai_tools/tools/nl2sql/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/crewai_tools/tools/nl2sql/README.md b/src/crewai_tools/tools/nl2sql/README.md index d0bb82271..932867c90 100644 --- a/src/crewai_tools/tools/nl2sql/README.md +++ b/src/crewai_tools/tools/nl2sql/README.md @@ -2,9 +2,9 @@ ## Description -This tool is used to convert natural language to SQL queries. When passsed to the agent it will generate queries and then use them to interact with the database. +This tool is used to convert natural language to SQL queries. When passed to the agent it will generate queries and then use them to interact with the database. -This enables multiple workflows like having an Agent to access the database fetch information based on the goal and then use the information to generate a response, report or any other output. Along with that proivdes the ability for the Agent to update the database based on its goal. +This enables multiple workflows like having an Agent to access the database fetch information based on the goal and then use the information to generate a response, report or any other output. Along with that provides the ability for the Agent to update the database based on its goal. **Attention**: Make sure that the Agent has access to a Read-Replica or that is okay for the Agent to run insert/update queries on the database. @@ -23,7 +23,6 @@ pip install 'crewai[tools]' In order to use the NL2SQLTool, you need to pass the database URI to the tool. The URI should be in the format `dialect+driver://username:password@host:port/database`. - ```python from crewai_tools import NL2SQLTool @@ -43,7 +42,7 @@ def researcher(self) -> Agent: The primary task goal was: -"Retrieve the average, maximum, and minimum monthly revenue for each city, but only include cities that have more than one user. Also, count the number of user in each city and sort the results by the average monthly revenue in descending order" +"Retrieve the average, maximum, and minimum monthly revenue for each city, but only include cities that have more than one user. Also, count the number of users in each city and sort the results by the average monthly revenue in descending order" So the Agent tried to get information from the DB, the first one is wrong so the Agent tries again and gets the correct information and passes to the next agent. From 6c242ef3bbfe722d4159e60c931d20e2a38a0570 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 14:04:06 -0600 Subject: [PATCH 13/69] add brave search tool --- .../tools/brave_search_tool/README.md | 30 +++++++ .../brave_search_tool/brave_search_tool.py | 82 +++++++++++++++++++ tests/tools/brave_search_tool_test.py | 13 +++ 3 files changed, 125 insertions(+) create mode 100644 src/crewai_tools/tools/brave_search_tool/README.md create mode 100644 src/crewai_tools/tools/brave_search_tool/brave_search_tool.py create mode 100644 tests/tools/brave_search_tool_test.py diff --git a/src/crewai_tools/tools/brave_search_tool/README.md b/src/crewai_tools/tools/brave_search_tool/README.md new file mode 100644 index 000000000..a66210491 --- /dev/null +++ b/src/crewai_tools/tools/brave_search_tool/README.md @@ -0,0 +1,30 @@ +# BraveSearchTool Documentation + +## Description +This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back. + +## Installation +To incorporate this tool into your project, follow the installation instructions below: +```shell +pip install 'crewai[tools]' +``` + +## Example +The following example demonstrates how to initialize the tool and execute a search with a given query: + +```python +from crewai_tools import BraveSearchTool + +# Initialize the tool for internet searching capabilities +tool = BraveSearchTool() +``` + +## Steps to Get Started +To effectively use the `BraveSearchTool`, follow these steps: + +1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. +2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys). +3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool. + +## Conclusion +By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py new file mode 100644 index 000000000..54f546f1e --- /dev/null +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -0,0 +1,82 @@ +import datetime +import os +from typing import Any, Optional, Type + +import requests +from pydantic import BaseModel, Field + +from crewai_tools.tools.base_tool import BaseTool + + +def _save_results_to_file(content: str) -> None: + """Saves the search results to a file.""" + filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" + with open(filename, "w") as file: + file.write(content) + print(f"Results saved to {filename}") + + +class BraveSearchToolSchema(BaseModel): + """Input for BraveSearchTool.""" + + search_query: str = Field( + ..., description="Mandatory search query you want to use to search the internet" + ) + + +class BraveSearchTool(BaseTool): + name: str = "Search the internet" + description: str = ( + "A tool that can be used to search the internet with a search_query." + ) + args_schema: Type[BaseModel] = BraveSearchToolSchema + search_url: str = "https://api.search.brave.com/res/v1/web/search" + country: Optional[str] = "" + n_results: int = 10 + save_file: bool = False + + def _run( + self, + **kwargs: Any, + ) -> Any: + search_query = kwargs.get("search_query") or kwargs.get("query") + save_file = kwargs.get("save_file", self.save_file) + n_results = kwargs.get("n_results", self.n_results) + + payload = {"q": search_query, "count": n_results} + + if self.country != "": + payload["country"] = self.country + + headers = { + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + "Accept": "application/json", + } + + response = requests.get(self.search_url, headers=headers, params=payload) + results = response.json() + + if "web" in results: + results = results["web"]["results"] + string = [] + for result in results: + try: + string.append( + "\n".join( + [ + f"Title: {result['title']}", + f"Link: {result['url']}", + f"Snippet: {result['description']}", + "---", + ] + ) + ) + except KeyError: + continue + + content = "\n".join(string) + if save_file: + _save_results_to_file(content) + return f"\nSearch results: {content}\n" + else: + return results diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py new file mode 100644 index 000000000..16c1bcb92 --- /dev/null +++ b/tests/tools/brave_search_tool_test.py @@ -0,0 +1,13 @@ +from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool + + +def test_brave_tool(): + tool = BraveSearchTool( + n_results=2, + ) + + print(tool.run(search_query="ChatGPT")) + + +if __name__ == "__main__": + test_brave_tool() From d168b8e24554e37a706d0af18c4b82af483fd442 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 21:36:28 -0600 Subject: [PATCH 14/69] add error handling --- .../tools/brave_search_tool/__init__.py | 0 .../brave_search_tool/brave_search_tool.py | 90 ++++++++++++------- tests/tools/brave_search_tool_test.py | 37 ++++++++ 3 files changed, 96 insertions(+), 31 deletions(-) create mode 100644 src/crewai_tools/tools/brave_search_tool/__init__.py diff --git a/src/crewai_tools/tools/brave_search_tool/__init__.py b/src/crewai_tools/tools/brave_search_tool/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 54f546f1e..6a8818d75 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -25,6 +25,18 @@ class BraveSearchToolSchema(BaseModel): class BraveSearchTool(BaseTool): + """ + BraveSearchTool - A tool for performing web searches using the Brave Search API. + + This module provides functionality to search the internet using Brave's Search API, + supporting customizable result counts and country-specific searches. + + Dependencies: + - requests + - pydantic + - python-dotenv (for API key management) + """ + name: str = "Search the internet" description: str = ( "A tool that can be used to search the internet with a search_query." @@ -35,48 +47,64 @@ class BraveSearchTool(BaseTool): n_results: int = 10 save_file: bool = False + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if "BRAVE_API_KEY" not in os.environ: + raise ValueError( + "BRAVE_API_KEY environment variable is required for BraveSearchTool" + ) + def _run( self, **kwargs: Any, ) -> Any: - search_query = kwargs.get("search_query") or kwargs.get("query") - save_file = kwargs.get("save_file", self.save_file) - n_results = kwargs.get("n_results", self.n_results) + try: + search_query = kwargs.get("search_query") or kwargs.get("query") + if not search_query: + raise ValueError("Search query is required") - payload = {"q": search_query, "count": n_results} + save_file = kwargs.get("save_file", self.save_file) + n_results = kwargs.get("n_results", self.n_results) - if self.country != "": - payload["country"] = self.country + payload = {"q": search_query, "count": n_results} - headers = { - "X-Subscription-Token": os.environ["BRAVE_API_KEY"], - "Accept": "application/json", - } + if self.country != "": + payload["country"] = self.country - response = requests.get(self.search_url, headers=headers, params=payload) - results = response.json() + headers = { + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + "Accept": "application/json", + } - if "web" in results: - results = results["web"]["results"] - string = [] - for result in results: - try: - string.append( - "\n".join( - [ - f"Title: {result['title']}", - f"Link: {result['url']}", - f"Snippet: {result['description']}", - "---", - ] + response = requests.get(self.search_url, headers=headers, params=payload) + response.raise_for_status() # Handle non-200 responses + results = response.json() + + if "web" in results: + results = results["web"]["results"] + string = [] + for result in results: + try: + string.append( + "\n".join( + [ + f"Title: {result['title']}", + f"Link: {result['url']}", + f"Snippet: {result['description']}", + "---", + ] + ) ) - ) - except KeyError: - continue + except KeyError: + continue content = "\n".join(string) - if save_file: - _save_results_to_file(content) + except requests.RequestException as e: + return f"Error performing search: {str(e)}" + except KeyError as e: + return f"Error parsing search results: {str(e)}" + if save_file: + _save_results_to_file(content) return f"\nSearch results: {content}\n" else: - return results + return content diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py index 16c1bcb92..969bd48fe 100644 --- a/tests/tools/brave_search_tool_test.py +++ b/tests/tools/brave_search_tool_test.py @@ -1,6 +1,41 @@ +from unittest.mock import patch + +import pytest + from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool +@pytest.fixture +def brave_tool(): + return BraveSearchTool(n_results=2) + + +def test_brave_tool_initialization(): + tool = BraveSearchTool() + assert tool.n_results == 10 + assert tool.save_file is False + + +@patch("requests.get") +def test_brave_tool_search(mock_get, brave_tool): + mock_response = { + "web": { + "results": [ + { + "title": "Test Title", + "url": "http://test.com", + "description": "Test Description", + } + ] + } + } + mock_get.return_value.json.return_value = mock_response + + result = brave_tool.run(search_query="test") + assert "Test Title" in result + assert "http://test.com" in result + + def test_brave_tool(): tool = BraveSearchTool( n_results=2, @@ -11,3 +46,5 @@ def test_brave_tool(): if __name__ == "__main__": test_brave_tool() + test_brave_tool_initialization() + # test_brave_tool_search(brave_tool) From 5532ea8ff72993860b85326d7299351a0b23c3b5 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 21:51:46 -0600 Subject: [PATCH 15/69] add lru caching --- src/crewai_tools/tools/brave_search_tool/brave_search_tool.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 6a8818d75..5ff451484 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -1,5 +1,6 @@ import datetime import os +from functools import lru_cache from typing import Any, Optional, Type import requests @@ -54,6 +55,7 @@ class BraveSearchTool(BaseTool): "BRAVE_API_KEY environment variable is required for BraveSearchTool" ) + @lru_cache(maxsize=100) def _run( self, **kwargs: Any, From e7e059d02a4fa09f2b13873643a4ce38c4c45dc2 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 22:08:29 -0600 Subject: [PATCH 16/69] add rate limiting --- .../tools/brave_search_tool/brave_search_tool.py | 13 ++++++++++--- tests/tools/brave_search_tool_test.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 5ff451484..8d6a9a182 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -1,7 +1,7 @@ import datetime import os -from functools import lru_cache -from typing import Any, Optional, Type +import time +from typing import Any, ClassVar, Optional, Type import requests from pydantic import BaseModel, Field @@ -47,6 +47,8 @@ class BraveSearchTool(BaseTool): country: Optional[str] = "" n_results: int = 10 save_file: bool = False + _last_request_time: ClassVar[float] = 0 + _min_request_interval: ClassVar[float] = 1.0 # seconds def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -55,11 +57,16 @@ class BraveSearchTool(BaseTool): "BRAVE_API_KEY environment variable is required for BraveSearchTool" ) - @lru_cache(maxsize=100) def _run( self, **kwargs: Any, ) -> Any: + current_time = time.time() + if (current_time - self._last_request_time) < self._min_request_interval: + time.sleep( + self._min_request_interval - (current_time - self._last_request_time) + ) + BraveSearchTool._last_request_time = time.time() try: search_query = kwargs.get("search_query") or kwargs.get("query") if not search_query: diff --git a/tests/tools/brave_search_tool_test.py b/tests/tools/brave_search_tool_test.py index 969bd48fe..36300f723 100644 --- a/tests/tools/brave_search_tool_test.py +++ b/tests/tools/brave_search_tool_test.py @@ -40,8 +40,8 @@ def test_brave_tool(): tool = BraveSearchTool( n_results=2, ) - - print(tool.run(search_query="ChatGPT")) + x = tool.run(search_query="ChatGPT") + print(x) if __name__ == "__main__": From 95cc6835a130a35fab52bae5c9e41e7073fc0ef0 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Sat, 30 Nov 2024 22:30:31 -0600 Subject: [PATCH 17/69] update name --- src/crewai_tools/tools/brave_search_tool/brave_search_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 8d6a9a182..dceff1d57 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -38,7 +38,7 @@ class BraveSearchTool(BaseTool): - python-dotenv (for API key management) """ - name: str = "Search the internet" + name: str = "Brave Web Search the internet" description: str = ( "A tool that can be used to search the internet with a search_query." ) From e0d3ee5b23b1999e4a8b01904e61fc19c85a41c0 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:35:23 +0800 Subject: [PATCH 18/69] docs: add Discourse community link to contact section Add link to Discourse community platform in the contact section to provide users with an additional support channel alongside Discord. --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 491890877..aca20f640 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ In the realm of CrewAI agents, tools are pivotal for enhancing functionality. Th

-[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) +[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) | [Discourse](https://community.crewai.com/)

@@ -140,6 +140,4 @@ Thank you for your interest in enhancing the capabilities of AI agents through a ## Contact -For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb) or open an issue in this repository. - - +For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb), [Discourse](https://community.crewai.com/) or open an issue in this repository. From a64cccbd724a9c24fc825a16de0ec1de2fc39ed9 Mon Sep 17 00:00:00 2001 From: siddas27 Date: Wed, 4 Dec 2024 22:28:30 -0600 Subject: [PATCH 19/69] add BraveSearchTool to init --- src/crewai_tools/__init__.py | 3 ++- src/crewai_tools/tools/__init__.py | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 6bd8dfd71..5f9a81d9c 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -1,4 +1,5 @@ from .tools import ( + BraveSearchTool, BrowserbaseLoadTool, CodeDocsSearchTool, CodeInterpreterTool, @@ -19,6 +20,7 @@ from .tools import ( LlamaIndexTool, MDXSearchTool, MultiOnTool, + MySQLSearchTool, NL2SQLTool, PDFSearchTool, PGSearchTool, @@ -40,6 +42,5 @@ from .tools import ( XMLSearchTool, YoutubeChannelSearchTool, YoutubeVideoSearchTool, - MySQLSearchTool ) from .tools.base_tool import BaseTool, Tool, tool diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 9016c57fd..73a96f4cf 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -1,3 +1,4 @@ +from .brave_search_tool.brave_search_tool import BraveSearchTool from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool @@ -11,10 +12,10 @@ from .exa_tools.exa_search_tool import EXASearchTool from .file_read_tool.file_read_tool import FileReadTool from .file_writer_tool.file_writer_tool import FileWriterTool from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import ( - FirecrawlCrawlWebsiteTool + FirecrawlCrawlWebsiteTool, ) from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( - FirecrawlScrapeWebsiteTool + FirecrawlScrapeWebsiteTool, ) from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool from .github_search_tool.github_search_tool import GithubSearchTool @@ -22,16 +23,17 @@ from .json_search_tool.json_search_tool import JSONSearchTool from .llamaindex_tool.llamaindex_tool import LlamaIndexTool from .mdx_seach_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool +from .mysql_search_tool.mysql_search_tool import MySQLSearchTool from .nl2sql.nl2sql_tool import NL2SQLTool from .pdf_search_tool.pdf_search_tool import PDFSearchTool from .pg_seach_tool.pg_search_tool import PGSearchTool from .rag.rag_tool import RagTool from .scrape_element_from_website.scrape_element_from_website import ( - ScrapeElementFromWebsiteTool + ScrapeElementFromWebsiteTool, ) from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ( - ScrapflyScrapeWebsiteTool + ScrapflyScrapeWebsiteTool, ) from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool from .serper_dev_tool.serper_dev_tool import SerperDevTool @@ -46,7 +48,6 @@ from .vision_tool.vision_tool import VisionTool from .website_search.website_search_tool import WebsiteSearchTool from .xml_search_tool.xml_search_tool import XMLSearchTool from .youtube_channel_search_tool.youtube_channel_search_tool import ( - YoutubeChannelSearchTool + YoutubeChannelSearchTool, ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool -from .mysql_search_tool.mysql_search_tool import MySQLSearchTool From d5fb31e645ccf0ebd7299442c6b099d6ca116e2c Mon Sep 17 00:00:00 2001 From: Brandon Hancock Date: Thu, 5 Dec 2024 13:16:48 -0500 Subject: [PATCH 20/69] update basetool dependencies to use root crewai repo --- README.md | 4 +- src/crewai_tools/__init__.py | 1 - src/crewai_tools/tools/base_tool.py | 59 ------------------- .../brave_search_tool/brave_search_tool.py | 3 +- .../browserbase_load_tool.py | 3 +- .../code_interpreter_tool.py | 3 +- .../tools/composio_tool/composio_tool.py | 3 +- .../tools/dalle_tool/dalle_tool.py | 3 +- .../directory_read_tool.py | 3 +- .../tools/exa_tools/exa_base_tool.py | 4 +- .../tools/exa_tools/exa_search_tool.py | 40 +++++++------ .../tools/file_read_tool/file_read_tool.py | 3 +- .../file_writer_tool/file_writer_tool.py | 16 ++--- .../firecrawl_crawl_website_tool.py | 12 ++-- .../firecrawl_scrape_website_tool.py | 10 ++-- .../firecrawl_search_tool.py | 3 +- .../jina_scrape_website_tool.py | 26 ++++---- .../tools/llamaindex_tool/llamaindex_tool.py | 3 +- .../tools/multion_tool/multion_tool.py | 2 +- src/crewai_tools/tools/nl2sql/nl2sql_tool.py | 6 +- src/crewai_tools/tools/rag/rag_tool.py | 3 +- .../scrape_element_from_website.py | 3 +- .../scrape_website_tool.py | 7 +-- .../scrapfly_scrape_website_tool.py | 3 +- .../selenium_scraping_tool.py | 4 +- .../tools/serper_dev_tool/serper_dev_tool.py | 3 +- .../serply_news_search_tool.py | 3 +- .../serply_scholar_search_tool.py | 3 +- .../serply_api_tool/serply_web_search_tool.py | 3 +- .../tools/spider_tool/spider_tool.py | 3 +- .../tools/vision_tool/vision_tool.py | 3 +- 31 files changed, 82 insertions(+), 163 deletions(-) delete mode 100644 src/crewai_tools/tools/base_tool.py diff --git a/README.md b/README.md index aca20f640..43cdc9b57 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ There are three ways to create tools for crewAI agents: ### Subclassing `BaseTool` ```python -from crewai_tools import BaseTool +from crewai.tools import BaseTool class MyCustomTool(BaseTool): name: str = "Name of my tool" @@ -70,7 +70,7 @@ Define a new class inheriting from `BaseTool`, specifying `name`, `description`, For a simpler approach, create a `Tool` object directly with the required attributes and a functional logic. ```python -from crewai_tools import tool +from crewai.tools import BaseTool @tool("Name of my tool") def my_tool(question: str) -> str: """Clear description for what this tool is useful for, you agent will need this information to use it.""" diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 5f9a81d9c..3fad09d9f 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -43,4 +43,3 @@ from .tools import ( YoutubeChannelSearchTool, YoutubeVideoSearchTool, ) -from .tools.base_tool import BaseTool, Tool, tool diff --git a/src/crewai_tools/tools/base_tool.py b/src/crewai_tools/tools/base_tool.py deleted file mode 100644 index 674e33030..000000000 --- a/src/crewai_tools/tools/base_tool.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Any, Callable - -from pydantic import BaseModel as PydanticBaseModel - -from crewai.tools.base_tool import BaseTool -from crewai.tools.structured_tool import CrewStructuredTool - - -class Tool(BaseTool): - func: Callable - """The function that will be executed when the tool is called.""" - - def _run(self, *args: Any, **kwargs: Any) -> Any: - return self.func(*args, **kwargs) - - -def to_langchain( - tools: list[BaseTool | CrewStructuredTool], -) -> list[CrewStructuredTool]: - return [t.to_structured_tool() if isinstance(t, BaseTool) else t for t in tools] - - -def tool(*args): - """ - Decorator to create a tool from a function. - """ - - def _make_with_name(tool_name: str) -> Callable: - def _make_tool(f: Callable) -> BaseTool: - if f.__doc__ is None: - raise ValueError("Function must have a docstring") - if f.__annotations__ is None: - raise ValueError("Function must have type annotations") - - class_name = "".join(tool_name.split()).title() - args_schema = type( - class_name, - (PydanticBaseModel,), - { - "__annotations__": { - k: v for k, v in f.__annotations__.items() if k != "return" - }, - }, - ) - - return Tool( - name=tool_name, - description=f.__doc__, - func=f, - args_schema=args_schema, - ) - - return _make_tool - - if len(args) == 1 and callable(args[0]): - return _make_with_name(args[0].__name__)(args[0]) - if len(args) == 1 and isinstance(args[0], str): - return _make_with_name(args[0]) - raise ValueError("Invalid arguments") diff --git a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index dceff1d57..11035739d 100644 --- a/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -4,10 +4,9 @@ import time from typing import Any, ClassVar, Optional, Type import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - def _save_results_to_file(content: str) -> None: """Saves the search results to a file.""" diff --git a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index 514664557..54c33db3c 100644 --- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class BrowserbaseLoadToolSchema(BaseModel): url: str = Field(description="Website URL") diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index a4488b35f..61c180fe3 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -3,10 +3,9 @@ import os from typing import List, Optional, Type import docker +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class CodeInterpreterSchema(BaseModel): """Input for CodeInterpreterTool.""" diff --git a/src/crewai_tools/tools/composio_tool/composio_tool.py b/src/crewai_tools/tools/composio_tool/composio_tool.py index 62068c0bd..4823441bf 100644 --- a/src/crewai_tools/tools/composio_tool/composio_tool.py +++ b/src/crewai_tools/tools/composio_tool/composio_tool.py @@ -5,8 +5,7 @@ Composio tools wrapper. import typing as t import typing_extensions as te - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class ComposioTool(BaseTool): diff --git a/src/crewai_tools/tools/dalle_tool/dalle_tool.py b/src/crewai_tools/tools/dalle_tool/dalle_tool.py index da6adb2b1..7040de11a 100644 --- a/src/crewai_tools/tools/dalle_tool/dalle_tool.py +++ b/src/crewai_tools/tools/dalle_tool/dalle_tool.py @@ -1,11 +1,10 @@ import json from typing import Type +from crewai.tools import BaseTool from openai import OpenAI from pydantic import BaseModel -from crewai_tools.tools.base_tool import BaseTool - class ImagePromptSchema(BaseModel): """Input for Dall-E Tool.""" diff --git a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 3d308ba45..6033202be 100644 --- a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -1,10 +1,9 @@ import os from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedDirectoryReadToolSchema(BaseModel): """Input for DirectoryReadTool.""" diff --git a/src/crewai_tools/tools/exa_tools/exa_base_tool.py b/src/crewai_tools/tools/exa_tools/exa_base_tool.py index d2fe6217c..295b283ad 100644 --- a/src/crewai_tools/tools/exa_tools/exa_base_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_base_tool.py @@ -1,10 +1,8 @@ -import os from typing import Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class EXABaseToolToolSchema(BaseModel): """Input for EXABaseTool.""" diff --git a/src/crewai_tools/tools/exa_tools/exa_search_tool.py b/src/crewai_tools/tools/exa_tools/exa_search_tool.py index 30f77d1ee..6724c2417 100644 --- a/src/crewai_tools/tools/exa_tools/exa_search_tool.py +++ b/src/crewai_tools/tools/exa_tools/exa_search_tool.py @@ -1,28 +1,30 @@ import os -import requests from typing import Any +import requests + from .exa_base_tool import EXABaseTool + class EXASearchTool(EXABaseTool): - def _run( - self, - **kwargs: Any, - ) -> Any: - search_query = kwargs.get('search_query') - if search_query is None: - search_query = kwargs.get('query') + def _run( + self, + **kwargs: Any, + ) -> Any: + search_query = kwargs.get("search_query") + if search_query is None: + search_query = kwargs.get("query") - payload = { - "query": search_query, - "type": "magic", - } + payload = { + "query": search_query, + "type": "magic", + } - headers = self.headers.copy() - headers["x-api-key"] = os.environ['EXA_API_KEY'] + headers = self.headers.copy() + headers["x-api-key"] = os.environ["EXA_API_KEY"] - response = requests.post(self.search_url, json=payload, headers=headers) - results = response.json() - if 'results' in results: - results = super()._parse_results(results['results']) - return results + response = requests.post(self.search_url, json=payload, headers=headers) + results = response.json() + if "results" in results: + results = super()._parse_results(results["results"]) + return results diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 265dca54a..fe34c9d8b 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedFileReadToolSchema(BaseModel): """Input for FileReadTool.""" diff --git a/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py b/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py index a008e4a75..ed454a1bd 100644 --- a/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py +++ b/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py @@ -1,16 +1,18 @@ import os -from typing import Any, Optional, Type -from pydantic import BaseModel -from ..base_tool import BaseTool from distutils.util import strtobool +from typing import Any, Optional, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel class FileWriterToolInput(BaseModel): - filename: str + filename: str directory: Optional[str] = "./" overwrite: str = "False" content: str - + + class FileWriterTool(BaseTool): name: str = "File Writer Tool" description: str = ( @@ -26,7 +28,7 @@ class FileWriterTool(BaseTool): # Construct the full path filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"]) - + # Convert overwrite to boolean kwargs["overwrite"] = bool(strtobool(kwargs["overwrite"])) @@ -46,4 +48,4 @@ class FileWriterTool(BaseTool): except KeyError as e: return f"An error occurred while accessing key: {str(e)}" except Exception as e: - return f"An error occurred while writing to the file: {str(e)}" \ No newline at end of file + return f"An error occurred while writing to the file: {str(e)}" diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 672656fc9..c23ff2100 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field, ConfigDict - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field # Type checking import if TYPE_CHECKING: @@ -21,9 +20,7 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel): class FirecrawlCrawlWebsiteTool(BaseTool): model_config = ConfigDict( - arbitrary_types_allowed=True, - validate_assignment=True, - frozen=False + arbitrary_types_allowed=True, validate_assignment=True, frozen=False ) name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" @@ -59,10 +56,11 @@ class FirecrawlCrawlWebsiteTool(BaseTool): try: from firecrawl import FirecrawlApp + # Must rebuild model after class is defined FirecrawlCrawlWebsiteTool.model_rebuild() except ImportError: """ When this tool is not used, then exception can be ignored. """ - pass \ No newline at end of file + pass diff --git a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 84b61209b..9ab7d293e 100644 --- a/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from pydantic import BaseModel, Field, ConfigDict - -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field # Type checking import if TYPE_CHECKING: @@ -25,9 +24,7 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteTool(BaseTool): model_config = ConfigDict( - arbitrary_types_allowed=True, - validate_assignment=True, - frozen=False + arbitrary_types_allowed=True, validate_assignment=True, frozen=False ) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages url using Firecrawl and return the contents" @@ -70,6 +67,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool): try: from firecrawl import FirecrawlApp + # Must rebuild model after class is defined FirecrawlScrapeWebsiteTool.model_rebuild() except ImportError: diff --git a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index 13c3b82ee..5efd274de 100644 --- a/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -1,9 +1,8 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - # Type checking import if TYPE_CHECKING: from firecrawl import FirecrawlApp diff --git a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index d887c085d..a10a4ffdb 100644 --- a/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -1,17 +1,21 @@ +from typing import Optional, Type + import requests -from typing import Type, Optional -from ..base_tool import BaseTool +from crewai.tools import BaseTool from pydantic import BaseModel, Field class JinaScrapeWebsiteToolInput(BaseModel): """Input schema for JinaScrapeWebsiteTool.""" + website_url: str = Field(..., description="Mandatory website url to read the file") class JinaScrapeWebsiteTool(BaseTool): name: str = "JinaScrapeWebsiteTool" - description: str = "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + description: str = ( + "A tool that can be used to read a website content using Jina.ai reader and return markdown content." + ) args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput website_url: Optional[str] = None api_key: Optional[str] = None @@ -22,31 +26,29 @@ class JinaScrapeWebsiteTool(BaseTool): website_url: Optional[str] = None, api_key: Optional[str] = None, custom_headers: Optional[dict] = None, - **kwargs + **kwargs, ): super().__init__(**kwargs) if website_url is not None: self.website_url = website_url - self.description = ( - f"A tool that can be used to read {website_url}'s content and return markdown content." - ) + self.description = f"A tool that can be used to read {website_url}'s content and return markdown content." self._generate_description() if custom_headers is not None: self.headers = custom_headers - + if api_key is not None: self.headers["Authorization"] = f"Bearer {api_key}" def _run(self, website_url: Optional[str] = None) -> str: url = website_url or self.website_url if not url: - raise ValueError("Website URL must be provided either during initialization or execution") + raise ValueError( + "Website URL must be provided either during initialization or execution" + ) response = requests.get( - f"https://r.jina.ai/{url}", - headers=self.headers, - timeout=15 + f"https://r.jina.ai/{url}", headers=self.headers, timeout=15 ) response.raise_for_status() return response.text diff --git a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py index af5c93e1f..61a747956 100644 --- a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py +++ b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py @@ -1,9 +1,8 @@ from typing import Any, Optional, Type, cast +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class LlamaIndexTool(BaseTool): """Tool to wrap LlamaIndex tools/query engines.""" diff --git a/src/crewai_tools/tools/multion_tool/multion_tool.py b/src/crewai_tools/tools/multion_tool/multion_tool.py index 2dc944f23..a991074da 100644 --- a/src/crewai_tools/tools/multion_tool/multion_tool.py +++ b/src/crewai_tools/tools/multion_tool/multion_tool.py @@ -2,7 +2,7 @@ from typing import Any, Optional -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class MultiOnTool(BaseTool): diff --git a/src/crewai_tools/tools/nl2sql/nl2sql_tool.py b/src/crewai_tools/tools/nl2sql/nl2sql_tool.py index 22c3a299b..786550ee7 100644 --- a/src/crewai_tools/tools/nl2sql/nl2sql_tool.py +++ b/src/crewai_tools/tools/nl2sql/nl2sql_tool.py @@ -1,11 +1,10 @@ -from typing import Any, Union +from typing import Any, Type, Union -from ..base_tool import BaseTool +from crewai.tools import BaseTool from pydantic import BaseModel, Field from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker -from typing import Type, Any class NL2SQLToolInput(BaseModel): sql_query: str = Field( @@ -13,6 +12,7 @@ class NL2SQLToolInput(BaseModel): description="The SQL query to execute.", ) + class NL2SQLTool(BaseTool): name: str = "NL2SQLTool" description: str = "Converts natural language to SQL queries and executes them." diff --git a/src/crewai_tools/tools/rag/rag_tool.py b/src/crewai_tools/tools/rag/rag_tool.py index 97291cd81..a9bbdab53 100644 --- a/src/crewai_tools/tools/rag/rag_tool.py +++ b/src/crewai_tools/tools/rag/rag_tool.py @@ -1,10 +1,9 @@ from abc import ABC, abstractmethod from typing import Any +from crewai.tools import BaseTool from pydantic import BaseModel, Field, model_validator -from crewai_tools.tools.base_tool import BaseTool - class Adapter(BaseModel, ABC): class Config: diff --git a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index 56bb27195..14757d247 100644 --- a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type import requests from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedScrapeElementFromWebsiteToolSchema(BaseModel): """Input for ScrapeElementFromWebsiteTool.""" diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 99df1d2dd..8cfc5d136 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -4,10 +4,9 @@ from typing import Any, Optional, Type import requests from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from ..base_tool import BaseTool - class FixedScrapeWebsiteToolSchema(BaseModel): """Input for ScrapeWebsiteTool.""" @@ -69,6 +68,6 @@ class ScrapeWebsiteTool(BaseTool): parsed = BeautifulSoup(page.text, "html.parser") text = parsed.get_text(" ") - text = re.sub('[ \t]+', ' ', text) - text = re.sub('\\s+\n\\s+', '\n', text) + text = re.sub("[ \t]+", " ", text) + text = re.sub("\\s+\n\\s+", "\n", text) return text diff --git a/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index 5800e223c..b47ce8e5b 100644 --- a/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -1,10 +1,9 @@ import logging from typing import Any, Dict, Literal, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - logger = logging.getLogger(__file__) diff --git a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py index 970cde7ca..47910f35b 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py +++ b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py @@ -1,14 +1,12 @@ import time from typing import Any, Optional, Type -from bs4 import BeautifulSoup +from crewai.tools import BaseTool from pydantic import BaseModel, Field from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By -from ..base_tool import BaseTool - class FixedSeleniumScrapingToolSchema(BaseModel): """Input for SeleniumScrapingTool.""" diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index ca118326e..8f53ce0a4 100644 --- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -4,10 +4,9 @@ import os from typing import Any, Optional, Type import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - def _save_results_to_file(content: str) -> None: """Saves the search results to a file.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py index 21e6e9872..c058091a2 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyNewsSearchToolSchema(BaseModel): """Input for Serply News Search.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py index 1ac6337f6..3ed9de4ab 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyScholarSearchToolSchema(BaseModel): """Input for Serply Scholar Search.""" diff --git a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py index b65fa21d1..b4d1ae4b5 100644 --- a/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py +++ b/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py @@ -3,10 +3,9 @@ from typing import Any, Optional, Type from urllib.parse import urlencode import requests +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SerplyWebSearchToolSchema(BaseModel): """Input for Serply Web Search.""" diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index c01b5e2a3..94da9f6fe 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -1,9 +1,8 @@ from typing import Any, Dict, Literal, Optional, Type +from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool - class SpiderToolSchema(BaseModel): url: str = Field(description="Website URL") diff --git a/src/crewai_tools/tools/vision_tool/vision_tool.py b/src/crewai_tools/tools/vision_tool/vision_tool.py index 6b7a21dbd..3ac3c3ae5 100644 --- a/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -2,11 +2,10 @@ import base64 from typing import Type import requests +from crewai.tools import BaseTool from openai import OpenAI from pydantic import BaseModel -from crewai_tools.tools.base_tool import BaseTool - class ImagePromptSchema(BaseModel): """Input for Vision Tool.""" From a0e0c2815273efe75760b4b83519f2d44500f916 Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Sun, 8 Dec 2024 21:44:19 -0800 Subject: [PATCH 21/69] setup weaviate vector search tool --- src/crewai_tools/__init__.py | 1 + src/crewai_tools/tools/__init__.py | 1 + .../tools/weaviate_tool/README.md | 80 +++++++++++++++++ .../tools/weaviate_tool/vector_search.py | 89 +++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 src/crewai_tools/tools/weaviate_tool/README.md create mode 100644 src/crewai_tools/tools/weaviate_tool/vector_search.py diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 3fad09d9f..12523a214 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -42,4 +42,5 @@ from .tools import ( XMLSearchTool, YoutubeChannelSearchTool, YoutubeVideoSearchTool, + WeaviateVectorSearchTool, ) diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 73a96f4cf..23565dbea 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -51,3 +51,4 @@ from .youtube_channel_search_tool.youtube_channel_search_tool import ( YoutubeChannelSearchTool, ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool +from .weaviate_tool.vector_search import WeaviateVectorSearchTool diff --git a/src/crewai_tools/tools/weaviate_tool/README.md b/src/crewai_tools/tools/weaviate_tool/README.md new file mode 100644 index 000000000..42daa40e0 --- /dev/null +++ b/src/crewai_tools/tools/weaviate_tool/README.md @@ -0,0 +1,80 @@ +# WeaviateVectorSearchTool + +## Description +This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query. + +Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect + +## Installation +Install the crewai_tools package by executing the following command in your terminal: + +```shell +uv pip install 'crewai[tools]' +``` + +## Example +To utilize the WeaviateVectorSearchTool for different use cases, follow these examples: + +```python +from crewai_tools import WeaviateVectorSearchTool + +# To enable the tool to search any website the agent comes across or learns about during its operation +tool = WeaviateVectorSearchTool( + collection_name='example_collections', + limit=3, + weaviate_cluster_url="https://your-weaviate-cluster-url.com", + weaviate_api_key="your-weaviate-api-key", +) + +# or + +# Setup custom model for vectorizer and generative model +tool = WeaviateVectorSearchTool( + collection_name='example_collections', + limit=3, + vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"), + generative_model=Configure.Generative.openai(model="gpt-4o-mini"), + weaviate_cluster_url="https://your-weaviate-cluster-url.com", + weaviate_api_key="your-weaviate-api-key", +) + +# Adding the tool to an agent +rag_agent = Agent( + name="rag_agent", + role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.", + llm="gpt-4o-mini", + tools=[tool], +) +``` + +## Arguments +- `collection_name` : The name of the collection to search within. (Required) +- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required) +- `weaviate_api_key` : The API key for the Weaviate cluster. (Required) +- `limit` : The number of results to return. (Optional) +- `vectorizer` : The vectorizer to use. (Optional) +- `generative_model` : The generative model to use. (Optional) + +Preloading the Weaviate database with documents: + +```python +from crewai_tools import WeaviateVectorSearchTool + +# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect +test_docs = client.collections.get("test_collection_name") + + +docs_to_load = os.listdir("knowledge") +with test_docs.batch.dynamic() as batch: + for d in docs_to_load: + with open(os.path.join("knowledge", d), "r") as f: + content = f.read() + batch.add_object( + { + "content": content, + "year": d.split("_")[0], + } + ) +tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3) + +``` diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py new file mode 100644 index 000000000..ab80b6ce1 --- /dev/null +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -0,0 +1,89 @@ +import os +import json +import weaviate +from pydantic import BaseModel, Field +from typing import Type, Optional +from crewai.tools import BaseTool + +from weaviate.classes.config import Configure, Vectorizers +from weaviate.classes.init import Auth + + +class WeaviateToolSchema(BaseModel): + """Input for WeaviateTool.""" + + query: str = Field( + ..., + description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.", + ) + + +class WeaviateVectorSearchTool(BaseTool): + """Tool to search the Weaviate database""" + + name: str = "WeaviateVectorSearchTool" + description: str = "A tool to search the Weaviate database for relevant information on internal documents." + args_schema: Type[BaseModel] = WeaviateToolSchema + query: Optional[str] = None + + vectorizer: Optional[Vectorizers] = Field( + default=Configure.Vectorizer.text2vec_openai( + model="nomic-embed-text", + ) + ) + generative_model: Optional[str] = Field( + default=Configure.Generative.openai( + model="gpt-4o", + ), + ) + collection_name: Optional[str] = None + limit: Optional[int] = Field(default=3) + headers: Optional[dict] = Field( + default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]} + ) + weaviate_cluster_url: str = Field( + ..., + description="The URL of the Weaviate cluster", + ) + weaviate_api_key: str = Field( + ..., + description="The API key for the Weaviate cluster", + ) + + def _run(self, query: str) -> str: + """Search the Weaviate database + + Args: + query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question. + + Returns: + str: The result of the search query + """ + + if not self.weaviate_cluster_url or not self.weaviate_api_key: + raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set") + + client = weaviate.connect_to_weaviate_cloud( + cluster_url=self.weaviate_cluster_url, + auth_credentials=Auth.api_key(self.weaviate_api_key), + headers=self.headers, + ) + internal_docs = client.collections.get(self.collection_name) + + if not internal_docs: + internal_docs = client.collections.create( + name=self.collection_name, + vectorizer_config=self.vectorizer, + generative_config=self.generative_model, + ) + + response = internal_docs.query.near_text( + query=query, + limit=self.limit, + ) + json_response = "" + for obj in response.objects: + json_response += json.dumps(obj.properties, indent=2) + + client.close() + return json_response From d5d83cbd7eea3cebac36fbdaa027229ec875e2bb Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Sun, 8 Dec 2024 21:48:15 -0800 Subject: [PATCH 22/69] fix collection name docs --- src/crewai_tools/tools/weaviate_tool/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/weaviate_tool/README.md b/src/crewai_tools/tools/weaviate_tool/README.md index 42daa40e0..c48f2f70a 100644 --- a/src/crewai_tools/tools/weaviate_tool/README.md +++ b/src/crewai_tools/tools/weaviate_tool/README.md @@ -61,7 +61,7 @@ Preloading the Weaviate database with documents: from crewai_tools import WeaviateVectorSearchTool # Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect -test_docs = client.collections.get("test_collection_name") +test_docs = client.collections.get("example_collections") docs_to_load = os.listdir("knowledge") From 1eb5d50a5572e82837387b0d3f7cfdbb10c6c421 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 12 Dec 2024 16:00:24 -0500 Subject: [PATCH 23/69] Fix url and api_key args on crawler tool --- .../firecrawl_crawl_website_tool.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index c23ff2100..d753cdd6f 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Type - from crewai.tools import BaseTool from pydantic import BaseModel, ConfigDict, Field +import os # Type checking import if TYPE_CHECKING: @@ -27,6 +27,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool): args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema api_key: Optional[str] = None firecrawl: Optional["FirecrawlApp"] = None + url: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) @@ -37,7 +38,11 @@ class FirecrawlCrawlWebsiteTool(BaseTool): "`firecrawl` package not found, please run `pip install firecrawl-py`" ) - self.firecrawl = FirecrawlApp(api_key=api_key) + client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") + if not client_api_key: + raise ValueError("FIRECRAWL_API_KEY is not set") + + self.firecrawl = FirecrawlApp(api_key=client_api_key) def _run( self, @@ -45,13 +50,17 @@ class FirecrawlCrawlWebsiteTool(BaseTool): crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None, ): + # Unless url has been previously set via constructor by the user, + # use the url argument provided by the agent + base_url = self.url or url + if crawler_options is None: crawler_options = {} if page_options is None: page_options = {} options = {"crawlerOptions": crawler_options, "pageOptions": page_options} - return self.firecrawl.crawl_url(url, options) + return self.firecrawl.crawl_url(base_url, options) try: From b0a948797aa973e165853d98fbef3788ab850b35 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:42:01 +0800 Subject: [PATCH 24/69] feat(serper-dev): implement enhanced search capabilities and error handling - Add support for multiple search types (general and news) - Implement knowledge graph integration - Add structured result processing for organic results, "People Also Ask", and related searches - Enhance error handling with try-catch blocks and logging - Update documentation with comprehensive feature list and usage examples --- .../tools/serper_dev_tool/README.md | 49 ++-- .../tools/serper_dev_tool/serper_dev_tool.py | 248 ++++++++++++++---- 2 files changed, 229 insertions(+), 68 deletions(-) diff --git a/src/crewai_tools/tools/serper_dev_tool/README.md b/src/crewai_tools/tools/serper_dev_tool/README.md index ae900a3bc..0beb9f2ab 100644 --- a/src/crewai_tools/tools/serper_dev_tool/README.md +++ b/src/crewai_tools/tools/serper_dev_tool/README.md @@ -1,30 +1,49 @@ # SerperDevTool Documentation ## Description -This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `serper.dev` API to fetch and display the most relevant search results based on the query provided by the user. +The SerperDevTool is a powerful search tool that interfaces with the `serper.dev` API to perform internet searches. It supports multiple search types including general search and news search, with features like knowledge graph integration, organic results, "People Also Ask" questions, and related searches. + +## Features +- Multiple search types: 'search' (default) and 'news' +- Knowledge graph integration for enhanced search context +- Organic search results with sitelinks +- "People Also Ask" questions and answers +- Related searches suggestions +- News search with date, source, and image information +- Configurable number of results +- Optional result saving to file ## Installation -To incorporate this tool into your project, follow the installation instructions below: ```shell pip install 'crewai[tools]' ``` -## Example -The following example demonstrates how to initialize the tool and execute a search with a given query: - +## Usage ```python from crewai_tools import SerperDevTool -# Initialize the tool for internet searching capabilities -tool = SerperDevTool() +# Initialize the tool +tool = SerperDevTool( + n_results=10, # Optional: Number of results to return (default: 10) + save_file=False, # Optional: Save results to file (default: False) + search_type="search" # Optional: Type of search - "search" or "news" (default: "search") +) + +# Execute a search +results = tool._run(search_query="your search query") ``` -## Steps to Get Started -To effectively use the `SerperDevTool`, follow these steps: +## Configuration +1. **API Key Setup**: + - Sign up for an account at `serper.dev` + - Obtain your API key + - Set the environment variable: `SERPER_API_KEY` -1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment. -2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at `serper.dev`. -3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPER_API_KEY` to facilitate its use by the tool. - -## Conclusion -By integrating the `SerperDevTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. +## Response Format +The tool returns structured data including: +- Search parameters +- Knowledge graph data (for general search) +- Organic search results +- "People Also Ask" questions +- Related searches +- News results (for news search type) diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index ca118326e..bbea77909 100644 --- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -1,20 +1,30 @@ import datetime import json import os -from typing import Any, Optional, Type +import logging +from typing import Any, Type import requests from pydantic import BaseModel, Field from crewai_tools.tools.base_tool import BaseTool +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + def _save_results_to_file(content: str) -> None: """Saves the search results to a file.""" - filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" - with open(filename, "w") as file: - file.write(content) - print(f"Results saved to {filename}") + try: + filename = f"search_results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" + with open(filename, "w") as file: + file.write(content) + logger.info(f"Results saved to {filename}") + except IOError as e: + logger.error(f"Failed to save results to file: {e}") + raise class SerperDevToolSchema(BaseModel): @@ -28,67 +38,199 @@ class SerperDevToolSchema(BaseModel): class SerperDevTool(BaseTool): name: str = "Search the internet" description: str = ( - "A tool that can be used to search the internet with a search_query." + "A tool that can be used to search the internet with a search_query. " + "Supports different search types: 'search' (default), 'news'" ) args_schema: Type[BaseModel] = SerperDevToolSchema - search_url: str = "https://google.serper.dev/search" - country: Optional[str] = "" - location: Optional[str] = "" - locale: Optional[str] = "" + base_url: str = "https://google.serper.dev" n_results: int = 10 save_file: bool = False + search_type: str = "search" - def _run( - self, - **kwargs: Any, - ) -> Any: + def _get_search_url(self, search_type: str) -> str: + """Get the appropriate endpoint URL based on search type.""" + search_type = search_type.lower() + allowed_search_types = ["search", "news"] + if search_type not in allowed_search_types: + raise ValueError( + f"Invalid search type: {search_type}. Must be one of: {', '.join(allowed_search_types)}" + ) + return f"{self.base_url}/{search_type}" - search_query = kwargs.get("search_query") or kwargs.get("query") - save_file = kwargs.get("save_file", self.save_file) - n_results = kwargs.get("n_results", self.n_results) + def _process_knowledge_graph(self, kg: dict) -> dict: + """Process knowledge graph data from search results.""" + return { + "title": kg.get("title", ""), + "type": kg.get("type", ""), + "website": kg.get("website", ""), + "imageUrl": kg.get("imageUrl", ""), + "description": kg.get("description", ""), + "descriptionSource": kg.get("descriptionSource", ""), + "descriptionLink": kg.get("descriptionLink", ""), + "attributes": kg.get("attributes", {}), + } - payload = {"q": search_query, "num": n_results} + def _process_organic_results(self, organic_results: list) -> list: + """Process organic search results.""" + processed_results = [] + for result in organic_results[: self.n_results]: + try: + result_data = { + "title": result["title"], + "link": result["link"], + "snippet": result.get("snippet", ""), + "position": result.get("position"), + } - if self.country != "": - payload["gl"] = self.country - if self.location != "": - payload["location"] = self.location - if self.locale != "": - payload["hl"] = self.locale + if "sitelinks" in result: + result_data["sitelinks"] = [ + { + "title": sitelink.get("title", ""), + "link": sitelink.get("link", ""), + } + for sitelink in result["sitelinks"] + ] - payload = json.dumps(payload) + processed_results.append(result_data) + except KeyError: + logger.warning(f"Skipping malformed organic result: {result}") + continue + return processed_results + def _process_people_also_ask(self, paa_results: list) -> list: + """Process 'People Also Ask' results.""" + processed_results = [] + for result in paa_results[: self.n_results]: + try: + result_data = { + "question": result["question"], + "snippet": result.get("snippet", ""), + "title": result.get("title", ""), + "link": result.get("link", ""), + } + processed_results.append(result_data) + except KeyError: + logger.warning(f"Skipping malformed PAA result: {result}") + continue + return processed_results + + def _process_related_searches(self, related_results: list) -> list: + """Process related search results.""" + processed_results = [] + for result in related_results[: self.n_results]: + try: + processed_results.append({"query": result["query"]}) + except KeyError: + logger.warning(f"Skipping malformed related search result: {result}") + continue + return processed_results + + def _process_news_results(self, news_results: list) -> list: + """Process news search results.""" + processed_results = [] + for result in news_results[: self.n_results]: + try: + result_data = { + "title": result["title"], + "link": result["link"], + "snippet": result.get("snippet", ""), + "date": result.get("date", ""), + "source": result.get("source", ""), + "imageUrl": result.get("imageUrl", ""), + } + processed_results.append(result_data) + except KeyError: + logger.warning(f"Skipping malformed news result: {result}") + continue + return processed_results + + def _make_api_request(self, search_query: str, search_type: str) -> dict: + """Make API request to Serper.""" + search_url = self._get_search_url(search_type) + payload = json.dumps({"q": search_query, "num": self.n_results}) headers = { "X-API-KEY": os.environ["SERPER_API_KEY"], "content-type": "application/json", } - response = requests.request( - "POST", self.search_url, headers=headers, data=payload - ) - results = response.json() - - if "organic" in results: - results = results["organic"][: self.n_results] - string = [] - for result in results: - try: - string.append( - "\n".join( - [ - f"Title: {result['title']}", - f"Link: {result['link']}", - f"Snippet: {result['snippet']}", - "---", - ] - ) - ) - except KeyError: - continue - - content = "\n".join(string) - if save_file: - _save_results_to_file(content) - return f"\nSearch results: {content}\n" - else: + response = None + try: + response = requests.post( + search_url, headers=headers, json=json.loads(payload), timeout=10 + ) + response.raise_for_status() + results = response.json() + if not results: + logger.error("Empty response from Serper API") + raise ValueError("Empty response from Serper API") return results + except requests.exceptions.RequestException as e: + error_msg = f"Error making request to Serper API: {e}" + if response is not None and hasattr(response, "content"): + error_msg += f"\nResponse content: {response.content}" + logger.error(error_msg) + raise + except json.JSONDecodeError as e: + if response is not None and hasattr(response, "content"): + logger.error(f"Error decoding JSON response: {e}") + logger.error(f"Response content: {response.content}") + else: + logger.error( + f"Error decoding JSON response: {e} (No response content available)" + ) + raise + + def _process_search_results(self, results: dict, search_type: str) -> dict: + """Process search results based on search type.""" + formatted_results = {} + + if search_type == "search": + if "knowledgeGraph" in results: + formatted_results["knowledgeGraph"] = self._process_knowledge_graph( + results["knowledgeGraph"] + ) + + if "organic" in results: + formatted_results["organic"] = self._process_organic_results( + results["organic"] + ) + + if "peopleAlsoAsk" in results: + formatted_results["peopleAlsoAsk"] = self._process_people_also_ask( + results["peopleAlsoAsk"] + ) + + if "relatedSearches" in results: + formatted_results["relatedSearches"] = self._process_related_searches( + results["relatedSearches"] + ) + + elif search_type == "news": + if "news" in results: + formatted_results["news"] = self._process_news_results(results["news"]) + + return formatted_results + + def _run(self, **kwargs: Any) -> Any: + """Execute the search operation.""" + search_query = kwargs.get("search_query") or kwargs.get("query") + search_type = kwargs.get("search_type", self.search_type) + save_file = kwargs.get("save_file", self.save_file) + + results = self._make_api_request(search_query, search_type) + + formatted_results = { + "searchParameters": { + "q": search_query, + "type": search_type, + **results.get("searchParameters", {}), + } + } + + formatted_results.update(self._process_search_results(results, search_type)) + formatted_results["credits"] = results.get("credits", 1) + + if save_file: + _save_results_to_file(json.dumps(formatted_results, indent=2)) + + return formatted_results From 1fd5805bef7341923da184debdb9515073eb9385 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:59:38 +0800 Subject: [PATCH 25/69] Resolved conflict --- src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index bbea77909..fde30735f 100644 --- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -7,7 +7,7 @@ from typing import Any, Type import requests from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool +from crewai_tools import BaseTool logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" From 00418d98f7df82380f519ac76ee870876eb75266 Mon Sep 17 00:00:00 2001 From: theCyberTech <84775494+theCyberTech@users.noreply.github.com> Date: Fri, 13 Dec 2024 22:01:04 +0800 Subject: [PATCH 26/69] resolved conflict --- src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index fde30735f..b23884180 100644 --- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -7,7 +7,7 @@ from typing import Any, Type import requests from pydantic import BaseModel, Field -from crewai_tools import BaseTool +from crewai.tools import BaseTool logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" From 2cb33b18e5bab2a6948ad0cb8bba81c27f27bed1 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:18:59 -0500 Subject: [PATCH 27/69] Remove outdated params --- .../firecrawl_crawl_website_tool.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index d753cdd6f..f75685a49 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -10,13 +10,6 @@ if TYPE_CHECKING: class FirecrawlCrawlWebsiteToolSchema(BaseModel): url: str = Field(description="Website URL") - crawler_options: Optional[Dict[str, Any]] = Field( - default=None, description="Options for crawling" - ) - page_options: Optional[Dict[str, Any]] = Field( - default=None, description="Options for page" - ) - class FirecrawlCrawlWebsiteTool(BaseTool): model_config = ConfigDict( From 3a095183c56aff4a7f8e7d9a1324f36f4fa52590 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:20:08 -0500 Subject: [PATCH 28/69] Use proper options and accept custom FirecrawlApp --- .../firecrawl_crawl_website_tool.py | 42 ++++++++++--------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index f75685a49..07fef7730 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -18,9 +18,12 @@ class FirecrawlCrawlWebsiteTool(BaseTool): name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema + firecrawl_app: Optional["FirecrawlApp"] = None api_key: Optional[str] = None - firecrawl: Optional["FirecrawlApp"] = None url: Optional[str] = None + params: Optional[Dict[str, Any]] = None + poll_interval: Optional[int] = 2 + idempotency_key: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) @@ -31,29 +34,28 @@ class FirecrawlCrawlWebsiteTool(BaseTool): "`firecrawl` package not found, please run `pip install firecrawl-py`" ) - client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") - if not client_api_key: - raise ValueError("FIRECRAWL_API_KEY is not set") + # Allows passing a previously created FirecrawlApp instance + # or builds a new one with the provided API key + if not self.firecrawl_app: + client_api_key = api_key or os.getenv("FIRECRAWL_API_KEY") + if not client_api_key: + raise ValueError( + "FIRECRAWL_API_KEY is not set. Please provide it either via the constructor " + "with the `api_key` argument or by setting the FIRECRAWL_API_KEY environment variable." + ) + self.firecrawl_app = FirecrawlApp(api_key=client_api_key) - self.firecrawl = FirecrawlApp(api_key=client_api_key) - - def _run( - self, - url: str, - crawler_options: Optional[Dict[str, Any]] = None, - page_options: Optional[Dict[str, Any]] = None, - ): + def _run(self, url: str): # Unless url has been previously set via constructor by the user, - # use the url argument provided by the agent + # use the url argument provided by the agent at runtime. base_url = self.url or url - if crawler_options is None: - crawler_options = {} - if page_options is None: - page_options = {} - - options = {"crawlerOptions": crawler_options, "pageOptions": page_options} - return self.firecrawl.crawl_url(base_url, options) + return self.firecrawl_app.crawl_url( + base_url, + params=self.params, + poll_interval=self.poll_interval, + idempotency_key=self.idempotency_key + ) try: From 164442223e153bffefcc794f22c71ece86eb095a Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:23:53 -0500 Subject: [PATCH 29/69] Organize imports --- .../firecrawl_crawl_website_tool.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 07fef7730..1de7602ec 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,7 +1,9 @@ -from typing import TYPE_CHECKING, Any, Dict, Optional, Type -from crewai.tools import BaseTool -from pydantic import BaseModel, ConfigDict, Field import os +from typing import TYPE_CHECKING, Any, Dict, Optional, Type + +from pydantic import BaseModel, ConfigDict, Field + +from crewai.tools import BaseTool # Type checking import if TYPE_CHECKING: From 668e87d5e13ea45f8b388dcf2a9a8187048e381c Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 13 Dec 2024 11:26:46 -0500 Subject: [PATCH 30/69] Add constructor comments --- .../firecrawl_crawl_website_tool.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 1de7602ec..edada38dd 100644 --- a/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -28,6 +28,17 @@ class FirecrawlCrawlWebsiteTool(BaseTool): idempotency_key: Optional[str] = None def __init__(self, api_key: Optional[str] = None, **kwargs): + """Initialize FirecrawlCrawlWebsiteTool. + + Args: + api_key (Optional[str]): Firecrawl API key. If not provided, will check FIRECRAWL_API_KEY env var. + url (Optional[str]): Base URL to crawl. Can be overridden by the _run method. + firecrawl_app (Optional[FirecrawlApp]): Previously created FirecrawlApp instance. + params (Optional[Dict[str, Any]]): Additional parameters to pass to the FirecrawlApp. + poll_interval (Optional[int]): Poll interval for the FirecrawlApp. + idempotency_key (Optional[str]): Idempotency key for the FirecrawlApp. + **kwargs: Additional arguments passed to BaseTool. + """ super().__init__(**kwargs) try: from firecrawl import FirecrawlApp # type: ignore From c26e962d174164a2fdd751376fa24ceed33c2ec6 Mon Sep 17 00:00:00 2001 From: Hammam Abdelwahab Date: Sun, 15 Dec 2024 10:34:07 +0100 Subject: [PATCH 31/69] Enabled manual setting of docker base url for code interpreter tool. Goal is to avoid the error: CodeInterpreterTool Error while fetching server API version: --- .../tools/code_interpreter_tool/README.md | 13 +++++++++++++ .../code_interpreter_tool/code_interpreter_tool.py | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/code_interpreter_tool/README.md b/src/crewai_tools/tools/code_interpreter_tool/README.md index bc73df7a4..ab0cbf44b 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/README.md +++ b/src/crewai_tools/tools/code_interpreter_tool/README.md @@ -38,3 +38,16 @@ Agent( tools=[CodeInterpreterTool(user_dockerfile_path="")], ) ``` + +If it is difficult to connect to docker daemon automatically (especially for macOS users), you can do this to setup docker host manually + +```python +from crewai_tools import CodeInterpreterTool + +Agent( + ... + tools=[CodeInterpreterTool(user_docker_base_url="", + user_dockerfile_path="")], +) + +``` diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index 61c180fe3..2f385c809 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -28,6 +28,7 @@ class CodeInterpreterTool(BaseTool): default_image_tag: str = "code-interpreter:latest" code: Optional[str] = None user_dockerfile_path: Optional[str] = None + user_docker_base_url: Optional[str] = None unsafe_mode: bool = False @staticmethod @@ -39,7 +40,7 @@ class CodeInterpreterTool(BaseTool): """ Verify if the Docker image is available. Optionally use a user-provided Dockerfile. """ - client = docker.from_env() + client = docker.from_env() if self.user_docker_base_url != None else docker.DockerClient(base_url=self.user_docker_base_url) try: client.images.get(self.default_image_tag) From b6bb5dbd535166ee4dbfac622afb66094faf7902 Mon Sep 17 00:00:00 2001 From: Hammam Abdelwahab Date: Sun, 15 Dec 2024 10:38:56 +0100 Subject: [PATCH 32/69] Enabled manual setting of docker base url for code interpreter tool. Goal is to avoid the error: CodeInterpreterTool Error while fetching server API version: --- .../tools/code_interpreter_tool/code_interpreter_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index 2f385c809..34648eb37 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -40,7 +40,7 @@ class CodeInterpreterTool(BaseTool): """ Verify if the Docker image is available. Optionally use a user-provided Dockerfile. """ - client = docker.from_env() if self.user_docker_base_url != None else docker.DockerClient(base_url=self.user_docker_base_url) + client = docker.from_env() if self.user_docker_base_url == None else docker.DockerClient(base_url=self.user_docker_base_url) try: client.images.get(self.default_image_tag) From 56a9060840f4f33b63a0dde5ebce3c985de61e60 Mon Sep 17 00:00:00 2001 From: Terry Tan Yongsheng Date: Tue, 17 Dec 2024 10:35:33 +0800 Subject: [PATCH 33/69] Add SerpApi tools - google search, google shopping --- src/crewai_tools/__init__.py | 2 + src/crewai_tools/tools/__init__.py | 2 + .../tools/serpapi_tool/serpapi_base_tool.py | 37 ++++++++++++++++ .../serpapi_google_search_tool.py | 40 ++++++++++++++++++ .../serpapi_google_shopping_tool.py | 42 +++++++++++++++++++ 5 files changed, 123 insertions(+) create mode 100644 src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py create mode 100644 src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py create mode 100644 src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 12523a214..87aca8531 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -43,4 +43,6 @@ from .tools import ( YoutubeChannelSearchTool, YoutubeVideoSearchTool, WeaviateVectorSearchTool, + SerpApiGoogleSearchTool, + SerpApiGoogleShoppingTool, ) diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 23565dbea..f6c31f45f 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -52,3 +52,5 @@ from .youtube_channel_search_tool.youtube_channel_search_tool import ( ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool from .weaviate_tool.vector_search import WeaviateVectorSearchTool +from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool +from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool \ No newline at end of file diff --git a/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py b/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py new file mode 100644 index 000000000..57e33e71e --- /dev/null +++ b/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py @@ -0,0 +1,37 @@ +import os +import re +from typing import Optional, Any + +from crewai.tools import BaseTool + +class SerpApiBaseTool(BaseTool): + client: Optional[Any] = None + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + try: + from serpapi import Client + except ImportError: + raise ImportError( + "`serpapi` package not found" + ) + api_key = os.getenv("SERPAPI_API_KEY") + if not api_key: + raise ValueError( + "Missing API key, you can get the key from https://serpapi.com/manage-api-key" + ) + self.client = Client(api_key=api_key) + + def _omit_fields(self, data, omit_patterns): + if isinstance(data, dict): + for field in list(data.keys()): + if any(re.compile(p).match(field) for p in omit_patterns): + data.pop(field, None) + else: + if isinstance(data[field], (dict, list)): + self._omit_fields(data[field], omit_patterns) + elif isinstance(data, list): + for item in data: + self._omit_fields(item, omit_patterns) + diff --git a/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py b/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py new file mode 100644 index 000000000..199b7f5a2 --- /dev/null +++ b/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py @@ -0,0 +1,40 @@ +from typing import Any, Type, Optional + +import re +from pydantic import BaseModel, Field +from .serpapi_base_tool import SerpApiBaseTool +from serpapi import HTTPError + +class SerpApiGoogleSearchToolSchema(BaseModel): + """Input for Google Search.""" + search_query: str = Field(..., description="Mandatory search query you want to use to Google search.") + location: Optional[str] = Field(None, description="Location you want the search to be performed in.") + +class SerpApiGoogleSearchTool(SerpApiBaseTool): + name: str = "Google Search" + description: str = ( + "A tool to perform to perform a Google search with a search_query." + ) + args_schema: Type[BaseModel] = SerpApiGoogleSearchToolSchema + + def _run( + self, + **kwargs: Any, + ) -> Any: + try: + results = self.client.search({ + "q": kwargs.get("search_query"), + "location": kwargs.get("location"), + }).as_dict() + + self._omit_fields( + results, + [r"search_metadata", r"search_parameters", r"serpapi_.+", r".+_token", r"displayed_link", r"pagination"] + ) + + return results + except HTTPError as e: + return f"An error occurred: {str(e)}. Some parameters may be invalid." + + + \ No newline at end of file diff --git a/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py b/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py new file mode 100644 index 000000000..b44b3a809 --- /dev/null +++ b/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py @@ -0,0 +1,42 @@ +from typing import Any, Type, Optional + +import re +from pydantic import BaseModel, Field +from .serpapi_base_tool import SerpApiBaseTool +from serpapi import HTTPError + +class SerpApiGoogleShoppingToolSchema(BaseModel): + """Input for Google Shopping.""" + search_query: str = Field(..., description="Mandatory search query you want to use to Google shopping.") + location: Optional[str] = Field(None, description="Location you want the search to be performed in.") + + +class SerpApiGoogleShoppingTool(SerpApiBaseTool): + name: str = "Google Shopping" + description: str = ( + "A tool to perform search on Google shopping with a search_query." + ) + args_schema: Type[BaseModel] = SerpApiGoogleShoppingToolSchema + + def _run( + self, + **kwargs: Any, + ) -> Any: + try: + results = self.client.search({ + "engine": "google_shopping", + "q": kwargs.get("search_query"), + "location": kwargs.get("location") + }).as_dict() + + self._omit_fields( + results, + [r"search_metadata", r"search_parameters", r"serpapi_.+", r"filters", r"pagination"] + ) + + return results + except HTTPError as e: + return f"An error occurred: {str(e)}. Some parameters may be invalid." + + + \ No newline at end of file From 2effe9a7d2ebacb063f8814e5974041161d839ef Mon Sep 17 00:00:00 2001 From: Terry Tan Yongsheng Date: Tue, 17 Dec 2024 11:09:38 +0800 Subject: [PATCH 34/69] Add README --- src/crewai_tools/tools/serpapi_tool/README.md | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/crewai_tools/tools/serpapi_tool/README.md diff --git a/src/crewai_tools/tools/serpapi_tool/README.md b/src/crewai_tools/tools/serpapi_tool/README.md new file mode 100644 index 000000000..d81b851f8 --- /dev/null +++ b/src/crewai_tools/tools/serpapi_tool/README.md @@ -0,0 +1,32 @@ +# SerpApi Tools + +## Description +[SerpApi](https://serpapi.com/) tools are built for searching information in the internet. It currently supports: +- Google Search +- Google Shopping + +To successfully make use of SerpApi tools, you have to have `SERPAPI_API_KEY` set in the environment. To get the API key, register a free account at [SerpApi](https://serpapi.com/). + +## Installation +To start using the SerpApi Tools, you must first install the `crewai_tools` package. This can be easily done with the following command: + +```shell +pip install 'crewai[tools]' +``` + +## Examples +The following example demonstrates how to initialize the tool + +### Google Search +```python +from crewai_tools import SerpApiGoogleSearchTool + +tool = SerpApiGoogleSearchTool() +``` + +### Google Shopping +```python +from crewai_tools import SerpApiGoogleShoppingTool + +tool = SerpApiGoogleShoppingTool() +``` From 81981e43b668dc1d2073fa1d7defbcfa97e452ac Mon Sep 17 00:00:00 2001 From: Terry Tan Yongsheng Date: Tue, 17 Dec 2024 13:45:50 +0800 Subject: [PATCH 35/69] Add type hints --- src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py b/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py index 57e33e71e..98491190c 100644 --- a/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py +++ b/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py @@ -1,10 +1,12 @@ import os import re -from typing import Optional, Any +from typing import Optional, Any, Union from crewai.tools import BaseTool class SerpApiBaseTool(BaseTool): + """Base class for SerpApi functionality with shared capabilities.""" + client: Optional[Any] = None def __init__(self, **kwargs): @@ -14,7 +16,7 @@ class SerpApiBaseTool(BaseTool): from serpapi import Client except ImportError: raise ImportError( - "`serpapi` package not found" + "`serpapi` package not found, please install with `pip install serpapi`" ) api_key = os.getenv("SERPAPI_API_KEY") if not api_key: @@ -23,7 +25,7 @@ class SerpApiBaseTool(BaseTool): ) self.client = Client(api_key=api_key) - def _omit_fields(self, data, omit_patterns): + def _omit_fields(self, data: Union[dict, list], omit_patterns: list[str]) -> None: if isinstance(data, dict): for field in list(data.keys()): if any(re.compile(p).match(field) for p in omit_patterns): @@ -34,4 +36,3 @@ class SerpApiBaseTool(BaseTool): elif isinstance(data, list): for item in data: self._omit_fields(item, omit_patterns) - From cd37ede869b3032ba191644a9eda613d2624ae30 Mon Sep 17 00:00:00 2001 From: Gilbert Bagaoisan Date: Mon, 16 Dec 2024 22:05:28 -0800 Subject: [PATCH 36/69] lint fixes --- .../tools/spider_tool/spider_tool.py | 135 ++++++++++++++---- 1 file changed, 106 insertions(+), 29 deletions(-) diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index 94da9f6fe..74fee809d 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -1,60 +1,137 @@ +import logging from typing import Any, Dict, Literal, Optional, Type +from urllib.parse import urlparse from crewai.tools import BaseTool from pydantic import BaseModel, Field +logger = logging.getLogger(__file__) + class SpiderToolSchema(BaseModel): - url: str = Field(description="Website URL") - params: Optional[Dict[str, Any]] = Field( - description="Set additional params. Options include:\n" - "- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n" - "- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n" - "- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n" - "- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n" + """Input schema for SpiderTool.""" + + website_url: str = Field( + ..., description="Mandatory website URL to scrape or crawl" ) mode: Literal["scrape", "crawl"] = Field( default="scrape", - description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.", + description="The mode of the SpiderTool. The only two allowed modes are `scrape` or `crawl`. Crawl mode will follow up to 5 links and return their content in markdown format.", ) class SpiderTool(BaseTool): - name: str = "Spider scrape & crawl tool" - description: str = "Scrape & Crawl any url and return LLM-ready data." - args_schema: Type[BaseModel] = SpiderToolSchema - api_key: Optional[str] = None - spider: Optional[Any] = None + """Tool for scraping and crawling websites.""" + + DEFAULT_CRAWL_LIMIT: int = 5 + DEFAULT_RETURN_FORMAT: str = "markdown" + + name: str = "SpiderTool" + description: str = ( + "A tool to scrape or crawl a website and return LLM-ready content." + ) + args_schema: Type[BaseModel] = SpiderToolSchema + custom_params: Optional[Dict[str, Any]] = None + website_url: Optional[str] = None + api_key: Optional[str] = None + spider: Any = None + log_failures: bool = True + + def __init__( + self, + api_key: Optional[str] = None, + website_url: Optional[str] = None, + custom_params: Optional[Dict[str, Any]] = None, + log_failures: bool = True, + **kwargs, + ): + """Initialize SpiderTool for web scraping and crawling. + + Args: + api_key (Optional[str]): Spider API key for authentication. Required for production use. + website_url (Optional[str]): Default website URL to scrape/crawl. Can be overridden during execution. + custom_params (Optional[Dict[str, Any]]): Additional parameters to pass to Spider API. + These override any parameters set by the LLM. + log_failures (bool): If True, logs errors. Defaults to True. + **kwargs: Additional arguments passed to BaseTool. + + Raises: + ImportError: If spider-client package is not installed. + RuntimeError: If Spider client initialization fails. + """ - def __init__(self, api_key: Optional[str] = None, **kwargs): super().__init__(**kwargs) + if website_url is not None: + self.website_url = website_url + + self.log_failures = log_failures + self.custom_params = custom_params + try: from spider import Spider # type: ignore + + self.spider = Spider(api_key=api_key) except ImportError: raise ImportError( "`spider-client` package not found, please run `pip install spider-client`" ) + except Exception as e: + raise RuntimeError(f"Failed to initialize Spider client: {str(e)}") - self.spider = Spider(api_key=api_key) + def _validate_url(self, url: str) -> bool: + """Validate URL format. + + Args: + url (str): URL to validate. + Returns: + bool: True if valid URL. + """ + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except Exception: + return False def _run( self, - url: str, - params: Optional[Dict[str, Any]] = None, - mode: Optional[Literal["scrape", "crawl"]] = "scrape", - ): - if mode not in ["scrape", "crawl"]: + website_url: str, + mode: Literal["scrape", "crawl"] = "scrape", + ) -> str: + params = {} + url = website_url or self.website_url + + if not self._validate_url(url): + raise ValueError("Invalid URL format") + + if not url: raise ValueError( - "Unknown mode in `mode` parameter, `scrape` or `crawl` are the allowed modes" + "Website URL must be provided either during initialization or execution" ) - # Ensure 'return_format': 'markdown' is always included - if params: - params["return_format"] = "markdown" - else: - params = {"return_format": "markdown"} + if mode not in ["scrape", "crawl"]: + raise ValueError("Mode must be either 'scrape' or 'crawl'") - action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url - spider_docs = action(url=url, params=params) + params["request"] = "smart" + params["filter_output_svg"] = True + params["return_format"] = self.DEFAULT_RETURN_FORMAT - return spider_docs + if mode == "crawl": + params["limit"] = self.DEFAULT_CRAWL_LIMIT + + # Update params with custom params if provided. + # This will override any params passed by LLM. + if self.custom_params: + params.update(self.custom_params) + + try: + action = ( + self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url + ) + return action(url=url, params=params) + + except Exception as e: + if self.log_failures: + logger.error(f"Error fetching data from {url}, exception: {e}") + return None + else: + raise e From 4551b8c6251754e6c67832af63d705ef9eb43cb1 Mon Sep 17 00:00:00 2001 From: Gilbert Bagaoisan Date: Mon, 16 Dec 2024 22:05:46 -0800 Subject: [PATCH 37/69] Updated readme --- src/crewai_tools/tools/spider_tool/README.md | 72 +++++++------------- 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/src/crewai_tools/tools/spider_tool/README.md b/src/crewai_tools/tools/spider_tool/README.md index 563c07a04..c2dc8826a 100644 --- a/src/crewai_tools/tools/spider_tool/README.md +++ b/src/crewai_tools/tools/spider_tool/README.md @@ -1,81 +1,57 @@ # SpiderTool ## Description - -[Spider](https://spider.cloud/?ref=crewai) is the [fastest](https://github.com/spider-rs/spider/blob/main/benches/BENCHMARKS.md#benchmark-results) open source scraper and crawler that returns LLM-ready data. It converts any website into pure HTML, markdown, metadata or text while enabling you to crawl with custom actions using AI. +[Spider](https://spider.cloud/?ref=crewai) is a high-performance web scraping and crawling tool that delivers optimized markdown for LLMs and AI agents. It intelligently switches between HTTP requests and JavaScript rendering based on page requirements. Perfect for both single-page scraping and website crawling—making it ideal for content extraction and data collection. ## Installation - -To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK too: +To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK, too: ```python pip install spider-client 'crewai[tools]' ``` ## Example - -This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is already LLM-ready, so no need to do any cleaning there. +This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is LLM-ready. ```python from crewai_tools import SpiderTool -def main(): - spider_tool = SpiderTool() - - searcher = Agent( - role="Web Research Expert", - goal="Find related information from specific URL's", - backstory="An expert web researcher that uses the web extremely well", - tools=[spider_tool], - verbose=True, - ) +# To enable scraping any website it finds during its execution +spider_tool = SpiderTool(api_key='YOUR_API_KEY') - return_metadata = Task( - description="Scrape https://spider.cloud with a limit of 1 and enable metadata", - expected_output="Metadata and 10 word summary of spider.cloud", - agent=searcher - ) +# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website +spider_tool = SpiderTool(website_url='https://www.example.com') - crew = Crew( - agents=[searcher], - tasks=[ - return_metadata, - ], - verbose=2 - ) - - crew.kickoff() - -if __name__ == "__main__": - main() +# Pass in custom parameters, see below for more details +spider_tool = SpiderTool( + website_url='https://www.example.com', + custom_params={"depth": 2, "anti_bot": True, "proxy_enabled": True} +) ``` ## Arguments - `api_key` (string, optional): Specifies Spider API key. If not specified, it looks for `SPIDER_API_KEY` in environment variables. -- `params` (object, optional): Optional parameters for the request. Defaults to `{"return_format": "markdown"}` to return the website's content in a format that fits LLMs better. +- `website_url` (string): The website URL. Will be used as a fallback if passed when the tool is initialized. +- `log_failures` (bool): Log scrape failures or fail silently. Defaults to `true`. +- `custom_params` (object, optional): Optional parameters for the request. + - `return_format` (string): The return format of the website's content. Defaults to `markdown`. - `request` (string): The request type to perform. Possible values are `http`, `chrome`, and `smart`. Use `smart` to perform an HTTP request by default until JavaScript rendering is needed for the HTML. - `limit` (int): The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages. - `depth` (int): The crawl limit for maximum depth. If `0`, no limit will be applied. - - `cache` (bool): Use HTTP caching for the crawl to speed up repeated runs. Default is `true`. - - `budget` (object): Object that has paths with a counter for limiting the amount of pages example `{"*":1}` for only crawling the root page. - `locale` (string): The locale to use for request, example `en-US`. - `cookies` (string): Add HTTP cookies to use for request. - `stealth` (bool): Use stealth mode for headless chrome request to help prevent being blocked. The default is `true` on chrome. - `headers` (object): Forward HTTP headers to use for all request. The object is expected to be a map of key value pairs. - - `metadata` (bool): Boolean to store metadata about the pages and content found. This could help improve AI interopt. Defaults to `false` unless you have the website already stored with the configuration enabled. - - `viewport` (object): Configure the viewport for chrome. Defaults to `800x600`. - - `encoding` (string): The type of encoding to use like `UTF-8`, `SHIFT_JIS`, or etc. + - `metadata` (bool): Boolean to store metadata about the pages and content found. Defaults to `false`. - `subdomains` (bool): Allow subdomains to be included. Default is `false`. - `user_agent` (string): Add a custom HTTP user agent to the request. By default this is set to a random agent. - - `store_data` (bool): Boolean to determine if storage should be used. If set this takes precedence over `storageless`. Defaults to `false`. - - `gpt_config` (object): Use AI to generate actions to perform during the crawl. You can pass an array for the `"prompt"` to chain steps. - - `fingerprint` (bool): Use advanced fingerprint for chrome. - - `storageless` (bool): Boolean to prevent storing any type of data for the request including storage and AI vectors embedding. Defaults to `false` unless you have the website already stored. - - `readability` (bool): Use [readability](https://github.com/mozilla/readability) to pre-process the content for reading. This may drastically improve the content for LLM usage. - `return_format` (string): The format to return the data in. Possible values are `markdown`, `raw`, `text`, and `html2text`. Use `raw` to return the default format of the page like HTML etc. - `proxy_enabled` (bool): Enable high performance premium proxies for the request to prevent being blocked at the network level. - - `query_selector` (string): The CSS query selector to use when extracting content from the markup. - - `full_resources` (bool): Crawl and download all the resources for a website. + - `css_extraction_map` (object): Use CSS or XPath selectors to scrape contents from the web page. Set the paths and the extraction object map to perform extractions per path or page. - `request_timeout` (int): The timeout to use for request. Timeouts can be from `5-60`. The default is `30` seconds. - - `run_in_background` (bool): Run the request in the background. Useful if storing data and wanting to trigger crawls to the dashboard. This has no effect if storageless is set. + - `return_headers` (bool): Return the HTTP response headers with the results. Defaults to `false`. + - `filter_output_main_only` (bool): Filter the nav, aside, and footer from the output. + - `headers` (object): Forward HTTP headers to use for all request. The object is expected to be a map of key value pairs. + +Learn other parameters that can be used: [https://spider.cloud/docs/api](https://spider.cloud/docs/api) + From 3795d7dd8eca55d8311bc776ff00dcea916500fb Mon Sep 17 00:00:00 2001 From: Gilbert Bagaoisan Date: Mon, 16 Dec 2024 22:19:46 -0800 Subject: [PATCH 38/69] Reversed order of url validation --- src/crewai_tools/tools/spider_tool/spider_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index 74fee809d..970ac8d64 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -100,14 +100,14 @@ class SpiderTool(BaseTool): params = {} url = website_url or self.website_url - if not self._validate_url(url): - raise ValueError("Invalid URL format") - if not url: raise ValueError( "Website URL must be provided either during initialization or execution" ) + if not self._validate_url(url): + raise ValueError("Invalid URL format") + if mode not in ["scrape", "crawl"]: raise ValueError("Mode must be either 'scrape' or 'crawl'") From 059d635f02916bfec4c4bb62d7b3f4000569055b Mon Sep 17 00:00:00 2001 From: Ho Trong Hien <115549171+hienhayho@users.noreply.github.com> Date: Tue, 17 Dec 2024 22:28:41 +0700 Subject: [PATCH 39/69] fix: fix pydantic validation error - When passing result_as_answer=True, it will return ToolOutput so it won't pass pydantic validation as a string - Get content of ToolOutput before return --- src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py index 61a747956..ba2605816 100644 --- a/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py +++ b/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py @@ -18,6 +18,10 @@ class LlamaIndexTool(BaseTool): from llama_index.core.tools import BaseTool as LlamaBaseTool tool = cast(LlamaBaseTool, self.llama_index_tool) + + if self.result_as_answer: + return tool(*args, **kwargs).content + return tool(*args, **kwargs) @classmethod From 73b803ddc3604efc5975de6863c737d80a8723aa Mon Sep 17 00:00:00 2001 From: Gilbert Bagaoisan Date: Tue, 17 Dec 2024 20:53:17 -0800 Subject: [PATCH 40/69] various improvements for PR based on recommendations --- .../tools/spider_tool/spider_tool.py | 133 +++++++++++++----- 1 file changed, 99 insertions(+), 34 deletions(-) diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index 970ac8d64..40959612f 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -1,6 +1,6 @@ import logging from typing import Any, Dict, Literal, Optional, Type -from urllib.parse import urlparse +from urllib.parse import unquote, urlparse from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -20,12 +20,28 @@ class SpiderToolSchema(BaseModel): ) -class SpiderTool(BaseTool): - """Tool for scraping and crawling websites.""" +class SpiderToolConfig(BaseModel): + """Configuration settings for SpiderTool. + Contains all default values and constants used by SpiderTool. + Centralizes configuration management for easier maintenance. + """ + + # Crawling settings DEFAULT_CRAWL_LIMIT: int = 5 DEFAULT_RETURN_FORMAT: str = "markdown" + # Request parameters + DEFAULT_REQUEST_MODE: str = "smart" + FILTER_SVG: bool = True + + +class SpiderTool(BaseTool): + """Tool for scraping and crawling websites. + This tool provides functionality to either scrape a single webpage or crawl multiple + pages, returning content in a format suitable for LLM processing. + """ + name: str = "SpiderTool" description: str = ( "A tool to scrape or crawl a website and return LLM-ready content." @@ -36,6 +52,7 @@ class SpiderTool(BaseTool): api_key: Optional[str] = None spider: Any = None log_failures: bool = True + config: SpiderToolConfig = SpiderToolConfig() def __init__( self, @@ -79,16 +96,26 @@ class SpiderTool(BaseTool): raise RuntimeError(f"Failed to initialize Spider client: {str(e)}") def _validate_url(self, url: str) -> bool: - """Validate URL format. + """Validate URL format and security constraints. Args: - url (str): URL to validate. + url (str): URL to validate. Must be a properly formatted HTTP(S) URL + Returns: - bool: True if valid URL. + bool: True if URL is valid and meets security requirements, False otherwise. """ try: - result = urlparse(url) - return all([result.scheme, result.netloc]) + url = url.strip() + decoded_url = unquote(url) + + result = urlparse(decoded_url) + if not all([result.scheme, result.netloc]): + return False + + if result.scheme not in ["http", "https"]: + return False + + return True except Exception: return False @@ -96,42 +123,80 @@ class SpiderTool(BaseTool): self, website_url: str, mode: Literal["scrape", "crawl"] = "scrape", - ) -> str: - params = {} - url = website_url or self.website_url + ) -> Optional[str]: + """Execute the spider tool to scrape or crawl the specified website. - if not url: - raise ValueError( - "Website URL must be provided either during initialization or execution" - ) + Args: + website_url (str): The URL to process. Must be a valid HTTP(S) URL. + mode (Literal["scrape", "crawl"]): Operation mode. + - "scrape": Extract content from single page + - "crawl": Follow links and extract content from multiple pages - if not self._validate_url(url): - raise ValueError("Invalid URL format") + Returns: + Optional[str]: Extracted content in markdown format, or None if extraction fails + and log_failures is True. - if mode not in ["scrape", "crawl"]: - raise ValueError("Mode must be either 'scrape' or 'crawl'") - - params["request"] = "smart" - params["filter_output_svg"] = True - params["return_format"] = self.DEFAULT_RETURN_FORMAT - - if mode == "crawl": - params["limit"] = self.DEFAULT_CRAWL_LIMIT - - # Update params with custom params if provided. - # This will override any params passed by LLM. - if self.custom_params: - params.update(self.custom_params) + Raises: + ValueError: If URL is invalid or missing, or if mode is invalid. + ImportError: If spider-client package is not properly installed. + ConnectionError: If network connection fails while accessing the URL. + Exception: For other runtime errors. + """ try: + params = {} + url = website_url or self.website_url + + if not url: + raise ValueError( + "Website URL must be provided either during initialization or execution" + ) + + if not self._validate_url(url): + raise ValueError(f"Invalid URL format: {url}") + + if mode not in ["scrape", "crawl"]: + raise ValueError( + f"Invalid mode: {mode}. Must be either 'scrape' or 'crawl'" + ) + + params = { + "request": self.config.DEFAULT_REQUEST_MODE, + "filter_output_svg": self.config.FILTER_SVG, + "return_format": self.config.DEFAULT_RETURN_FORMAT, + } + + if mode == "crawl": + params["limit"] = self.config.DEFAULT_CRAWL_LIMIT + + if self.custom_params: + params.update(self.custom_params) + action = ( self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url ) return action(url=url, params=params) + except ValueError as ve: + if self.log_failures: + logger.error(f"Validation error for URL {url}: {str(ve)}") + return None + raise ve + + except ImportError as ie: + logger.error(f"Spider client import error: {str(ie)}") + raise ie + + except ConnectionError as ce: + if self.log_failures: + logger.error(f"Connection error while accessing {url}: {str(ce)}") + return None + raise ce + except Exception as e: if self.log_failures: - logger.error(f"Error fetching data from {url}, exception: {e}") + logger.error( + f"Unexpected error during {mode} operation on {url}: {str(e)}" + ) return None - else: - raise e + raise e From 1bbac87e70cfe2fb71a3d5a5a5ec2af13bebbdaf Mon Sep 17 00:00:00 2001 From: Gilbert Bagaoisan Date: Tue, 17 Dec 2024 20:54:07 -0800 Subject: [PATCH 41/69] =?UTF-8?q?Improved=20readme=20based=20on=20recommen?= =?UTF-8?q?dations=E2=80=94added=20more=20advanced=20usage=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/crewai_tools/tools/spider_tool/README.md | 34 ++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/crewai_tools/tools/spider_tool/README.md b/src/crewai_tools/tools/spider_tool/README.md index c2dc8826a..482c7c830 100644 --- a/src/crewai_tools/tools/spider_tool/README.md +++ b/src/crewai_tools/tools/spider_tool/README.md @@ -20,13 +20,43 @@ from crewai_tools import SpiderTool spider_tool = SpiderTool(api_key='YOUR_API_KEY') # Initialize the tool with the website URL, so the agent can only scrape the content of the specified website -spider_tool = SpiderTool(website_url='https://www.example.com') +spider_tool = SpiderTool(website_url='https://spider.cloud') # Pass in custom parameters, see below for more details spider_tool = SpiderTool( - website_url='https://www.example.com', + website_url='https://spider.cloud', custom_params={"depth": 2, "anti_bot": True, "proxy_enabled": True} ) + +# Advanced usage using css query selector to extract content +css_extraction_map = { + "/": [ # pass in path (main index in this case) + { + "name": "headers", # give it a name for this element + "selectors": [ + "h1" + ] + } + ] +} + +spider_tool = SpiderTool( + website_url='https://spider.cloud', + custom_params={"anti_bot": True, "proxy_enabled": True, "metadata": True, "css_extraction_map": css_extraction_map} +) + +### Response (extracted text will be in the metadata) +"css_extracted": { + "headers": [ + "The Web Crawler for AI Agents and LLMs!" + ] +} +``` +## Agent setup +```yaml +researcher: + role: > + You're a researcher that is tasked with researching a website and it's content (use crawl mode). The website is to crawl is: {website_url}. ``` ## Arguments From c070ba002c0d1f96087a53ed89a6963ba8d4b7ac Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 18 Dec 2024 14:34:40 +0100 Subject: [PATCH 42/69] feat: integration of scrapegraph APIs --- .../tools/scrapegraph_scrape_tool/README.md | 43 ++++++++++ .../scrapegraph_scrape_tool.py | 82 +++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 src/crewai_tools/tools/scrapegraph_scrape_tool/README.md create mode 100644 src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py diff --git a/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md b/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md new file mode 100644 index 000000000..76f385831 --- /dev/null +++ b/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md @@ -0,0 +1,43 @@ +# ScrapegraphScrapeTool + +## Description +A tool that leverages Scrapegraph AI's SmartScraper API to intelligently extract content from websites. This tool provides advanced web scraping capabilities with AI-powered content extraction, making it ideal for targeted data collection and content analysis tasks. + +## Installation +Install the required packages: +```shell +pip install 'crewai[tools]' +``` + +## Example +```python +from crewai_tools import ScrapegraphScrapeTool + +# Basic usage with API key +tool = ScrapegraphScrapeTool(api_key="your_api_key") +result = tool.run( + website_url="https://www.example.com", + user_prompt="Extract the main heading and summary" +) + +# Initialize with a fixed website URL +tool = ScrapegraphScrapeTool( + website_url="https://www.example.com", + api_key="your_api_key" +) +result = tool.run() + +# With custom prompt +tool = ScrapegraphScrapeTool( + api_key="your_api_key", + user_prompt="Extract all product prices and descriptions" +) +``` + +## Arguments +- `website_url`: The URL of the website to scrape (required if not set during initialization) +- `user_prompt`: Custom instructions for content extraction (optional) +- `api_key`: Your Scrapegraph API key (required, can be set via SCRAPEGRAPH_API_KEY environment variable) + +## Environment Variables +- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key diff --git a/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py b/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py new file mode 100644 index 000000000..058af4150 --- /dev/null +++ b/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py @@ -0,0 +1,82 @@ +import os +from typing import Any, Optional, Type + +from crewai.tools import BaseTool +from pydantic import BaseModel, Field +from scrapegraph_py import Client +from scrapegraph_py.logger import sgai_logger + + +class FixedScrapegraphScrapeToolSchema(BaseModel): + """Input for ScrapegraphScrapeTool when website_url is fixed.""" + + pass + + +class ScrapegraphScrapeToolSchema(FixedScrapegraphScrapeToolSchema): + """Input for ScrapegraphScrapeTool.""" + + website_url: str = Field(..., description="Mandatory website url to scrape") + user_prompt: str = Field( + default="Extract the main content of the webpage", + description="Prompt to guide the extraction of content", + ) + + +class ScrapegraphScrapeTool(BaseTool): + name: str = "Scrapegraph website scraper" + description: str = "A tool that uses Scrapegraph AI to intelligently scrape website content." + args_schema: Type[BaseModel] = ScrapegraphScrapeToolSchema + website_url: Optional[str] = None + user_prompt: Optional[str] = None + api_key: Optional[str] = None + + def __init__( + self, + website_url: Optional[str] = None, + user_prompt: Optional[str] = None, + api_key: Optional[str] = None, + **kwargs, + ): + super().__init__(**kwargs) + self.api_key = api_key or os.getenv("SCRAPEGRAPH_API_KEY") + + if not self.api_key: + raise ValueError("Scrapegraph API key is required") + + if website_url is not None: + self.website_url = website_url + self.description = f"A tool that uses Scrapegraph AI to intelligently scrape {website_url}'s content." + self.args_schema = FixedScrapegraphScrapeToolSchema + + if user_prompt is not None: + self.user_prompt = user_prompt + + # Configure logging + sgai_logger.set_logging(level="INFO") + + def _run( + self, + **kwargs: Any, + ) -> Any: + website_url = kwargs.get("website_url", self.website_url) + user_prompt = kwargs.get("user_prompt", self.user_prompt) or "Extract the main content of the webpage" + + if not website_url: + raise ValueError("website_url is required") + + # Initialize the client + sgai_client = Client(api_key=self.api_key) + + try: + # Make the SmartScraper request + response = sgai_client.smartscraper( + website_url=website_url, + user_prompt=user_prompt, + ) + + # Return the result + return response["result"] + finally: + # Always close the client + sgai_client.close() From 7608944e7f0e60f597e39fc2f40fc93fe31c4e28 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 18 Dec 2024 14:38:34 +0100 Subject: [PATCH 43/69] Update README.md --- src/crewai_tools/tools/scrapegraph_scrape_tool/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md b/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md index 76f385831..03467faee 100644 --- a/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md +++ b/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md @@ -40,4 +40,4 @@ tool = ScrapegraphScrapeTool( - `api_key`: Your Scrapegraph API key (required, can be set via SCRAPEGRAPH_API_KEY environment variable) ## Environment Variables -- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key +- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key, you can buy it [here](https://scrapegraphai.com) From b58d80dcf9373099ecc1bbc2715b6d042e8396ca Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 18 Dec 2024 14:42:37 +0100 Subject: [PATCH 44/69] update documents according to suggestions --- .../tools/scrapegraph_scrape_tool/README.md | 45 +++++++++++- .../scrapegraph_scrape_tool.py | 73 ++++++++++++++++++- 2 files changed, 112 insertions(+), 6 deletions(-) diff --git a/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md b/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md index 03467faee..e006c0ff9 100644 --- a/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md +++ b/src/crewai_tools/tools/scrapegraph_scrape_tool/README.md @@ -9,7 +9,9 @@ Install the required packages: pip install 'crewai[tools]' ``` -## Example +## Example Usage + +### Basic Usage ```python from crewai_tools import ScrapegraphScrapeTool @@ -19,19 +21,40 @@ result = tool.run( website_url="https://www.example.com", user_prompt="Extract the main heading and summary" ) +``` +### Fixed Website URL +```python # Initialize with a fixed website URL tool = ScrapegraphScrapeTool( website_url="https://www.example.com", api_key="your_api_key" ) result = tool.run() +``` +### Custom Prompt +```python # With custom prompt tool = ScrapegraphScrapeTool( api_key="your_api_key", user_prompt="Extract all product prices and descriptions" ) +result = tool.run(website_url="https://www.example.com") +``` + +### Error Handling +```python +try: + tool = ScrapegraphScrapeTool(api_key="your_api_key") + result = tool.run( + website_url="https://www.example.com", + user_prompt="Extract the main heading" + ) +except ValueError as e: + print(f"Configuration error: {e}") # Handles invalid URLs or missing API keys +except RuntimeError as e: + print(f"Scraping error: {e}") # Handles API or network errors ``` ## Arguments @@ -40,4 +63,22 @@ tool = ScrapegraphScrapeTool( - `api_key`: Your Scrapegraph API key (required, can be set via SCRAPEGRAPH_API_KEY environment variable) ## Environment Variables -- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key, you can buy it [here](https://scrapegraphai.com) +- `SCRAPEGRAPH_API_KEY`: Your Scrapegraph API key, you can obtain one [here](https://scrapegraphai.com) + +## Rate Limiting +The Scrapegraph API has rate limits that vary based on your subscription plan. Consider the following best practices: +- Implement appropriate delays between requests when processing multiple URLs +- Handle rate limit errors gracefully in your application +- Check your API plan limits on the Scrapegraph dashboard + +## Error Handling +The tool may raise the following exceptions: +- `ValueError`: When API key is missing or URL format is invalid +- `RuntimeError`: When scraping operation fails (network issues, API errors) +- `RateLimitError`: When API rate limits are exceeded + +## Best Practices +1. Always validate URLs before making requests +2. Implement proper error handling as shown in examples +3. Consider caching results for frequently accessed pages +4. Monitor your API usage through the Scrapegraph dashboard diff --git a/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py b/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py index 058af4150..906bf6376 100644 --- a/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py +++ b/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py @@ -1,15 +1,25 @@ import os from typing import Any, Optional, Type +from urllib.parse import urlparse from crewai.tools import BaseTool -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, validator from scrapegraph_py import Client from scrapegraph_py.logger import sgai_logger +class ScrapegraphError(Exception): + """Base exception for Scrapegraph-related errors""" + pass + + +class RateLimitError(ScrapegraphError): + """Raised when API rate limits are exceeded""" + pass + + class FixedScrapegraphScrapeToolSchema(BaseModel): """Input for ScrapegraphScrapeTool when website_url is fixed.""" - pass @@ -22,8 +32,28 @@ class ScrapegraphScrapeToolSchema(FixedScrapegraphScrapeToolSchema): description="Prompt to guide the extraction of content", ) + @validator('website_url') + def validate_url(cls, v): + """Validate URL format""" + try: + result = urlparse(v) + if not all([result.scheme, result.netloc]): + raise ValueError + return v + except Exception: + raise ValueError("Invalid URL format. URL must include scheme (http/https) and domain") + class ScrapegraphScrapeTool(BaseTool): + """ + A tool that uses Scrapegraph AI to intelligently scrape website content. + + Raises: + ValueError: If API key is missing or URL format is invalid + RateLimitError: If API rate limits are exceeded + RuntimeError: If scraping operation fails + """ + name: str = "Scrapegraph website scraper" description: str = "A tool that uses Scrapegraph AI to intelligently scrape website content." args_schema: Type[BaseModel] = ScrapegraphScrapeToolSchema @@ -45,6 +75,7 @@ class ScrapegraphScrapeTool(BaseTool): raise ValueError("Scrapegraph API key is required") if website_url is not None: + self._validate_url(website_url) self.website_url = website_url self.description = f"A tool that uses Scrapegraph AI to intelligently scrape {website_url}'s content." self.args_schema = FixedScrapegraphScrapeToolSchema @@ -55,6 +86,32 @@ class ScrapegraphScrapeTool(BaseTool): # Configure logging sgai_logger.set_logging(level="INFO") + @staticmethod + def _validate_url(url: str) -> None: + """Validate URL format""" + try: + result = urlparse(url) + if not all([result.scheme, result.netloc]): + raise ValueError + except Exception: + raise ValueError("Invalid URL format. URL must include scheme (http/https) and domain") + + def _handle_api_response(self, response: dict) -> str: + """Handle and validate API response""" + if not response: + raise RuntimeError("Empty response from Scrapegraph API") + + if "error" in response: + error_msg = response.get("error", {}).get("message", "Unknown error") + if "rate limit" in error_msg.lower(): + raise RateLimitError(f"Rate limit exceeded: {error_msg}") + raise RuntimeError(f"API error: {error_msg}") + + if "result" not in response: + raise RuntimeError("Invalid response format from Scrapegraph API") + + return response["result"] + def _run( self, **kwargs: Any, @@ -65,6 +122,9 @@ class ScrapegraphScrapeTool(BaseTool): if not website_url: raise ValueError("website_url is required") + # Validate URL format + self._validate_url(website_url) + # Initialize the client sgai_client = Client(api_key=self.api_key) @@ -75,8 +135,13 @@ class ScrapegraphScrapeTool(BaseTool): user_prompt=user_prompt, ) - # Return the result - return response["result"] + # Handle and validate the response + return self._handle_api_response(response) + + except RateLimitError: + raise # Re-raise rate limit errors + except Exception as e: + raise RuntimeError(f"Scraping failed: {str(e)}") finally: # Always close the client sgai_client.close() From 8d8c3677ff372bca8b9d92fcd25d477f7956843f Mon Sep 17 00:00:00 2001 From: Pedro Pereira Date: Wed, 18 Dec 2024 18:23:18 +0100 Subject: [PATCH 45/69] feat: add optional return_html flag to SeleniumScrapingTool --- .../selenium_scraping_tool.py | 45 +++++++++++++++---- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py index 47910f35b..5f7d9391b 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py +++ b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py @@ -11,8 +11,6 @@ from selenium.webdriver.common.by import By class FixedSeleniumScrapingToolSchema(BaseModel): """Input for SeleniumScrapingTool.""" - pass - class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): """Input for SeleniumScrapingTool.""" @@ -33,6 +31,7 @@ class SeleniumScrapingTool(BaseTool): cookie: Optional[dict] = None wait_time: Optional[int] = 3 css_element: Optional[str] = None + return_html: Optional[bool] = False def __init__( self, @@ -63,18 +62,46 @@ class SeleniumScrapingTool(BaseTool): ) -> Any: website_url = kwargs.get("website_url", self.website_url) css_element = kwargs.get("css_element", self.css_element) + return_html = kwargs.get("return_html", self.return_html) driver = self._create_driver(website_url, self.cookie, self.wait_time) - content = [] - if css_element is None or css_element.strip() == "": - body_text = driver.find_element(By.TAG_NAME, "body").text - content.append(body_text) - else: - for element in driver.find_elements(By.CSS_SELECTOR, css_element): - content.append(element.text) + content = self._get_content(driver, css_element, return_html) driver.close() + return "\n".join(content) + def _get_content(self, driver, css_element, return_html): + content = [] + + if self._is_css_element_empty(css_element): + content.append(self._get_body_content(driver, return_html)) + else: + content.extend(self._get_elements_content(driver, css_element, return_html)) + + return content + + def _is_css_element_empty(self, css_element): + return css_element is None or css_element.strip() == "" + + def _get_body_content(self, driver, return_html): + body_element = driver.find_element(By.TAG_NAME, "body") + + return ( + body_element.get_attribute("outerHTML") + if return_html + else body_element.text + ) + + def _get_elements_content(self, driver, css_element, return_html): + elements_content = [] + + for element in driver.find_elements(By.CSS_SELECTOR, css_element): + elements_content.append( + element.get_attribute("outerHTML") if return_html else element.text + ) + + return elements_content + def _create_driver(self, url, cookie, wait_time): options = Options() options.add_argument("--headless") From 4c5f1962ace1f5ad4fe628f7cb2a33cf19753783 Mon Sep 17 00:00:00 2001 From: juliette_sivan Date: Thu, 19 Dec 2024 14:07:36 +0100 Subject: [PATCH 46/69] add linkup tool --- src/crewai_tools/tools/linkup/README.md | 98 ++++++++++++++++++ src/crewai_tools/tools/linkup/assets/icon.png | Bin 0 -> 32966 bytes .../tools/linkup/linkup_search_tool.py | 36 +++++++ 3 files changed, 134 insertions(+) create mode 100644 src/crewai_tools/tools/linkup/README.md create mode 100644 src/crewai_tools/tools/linkup/assets/icon.png create mode 100644 src/crewai_tools/tools/linkup/linkup_search_tool.py diff --git a/src/crewai_tools/tools/linkup/README.md b/src/crewai_tools/tools/linkup/README.md new file mode 100644 index 000000000..c51946a11 --- /dev/null +++ b/src/crewai_tools/tools/linkup/README.md @@ -0,0 +1,98 @@ +# Linkup Search Tool + +## Description + +The `LinkupSearchTool` is a tool designed for integration with the CrewAI framework. It provides the ability to query the Linkup API for contextual information and retrieve structured results. This tool is ideal for enriching workflows with up-to-date and reliable information from Linkup. + +--- + +## Features + +- Perform API queries to the Linkup platform using customizable parameters (`query`, `depth`, `output_type`). +- Gracefully handles API errors and provides structured feedback. +- Returns well-structured results for seamless integration into CrewAI processes. + +--- + +## Installation + +### Prerequisites + +- Linkup API Key + +### Steps + +1. ```shell + pip install 'crewai[tools]' + ``` + +2. Create a `.env` file in your project root and add your Linkup API Key: + ```plaintext + LINKUP_API_KEY=your_linkup_api_key + ``` + +--- + +## Usage + +### Basic Example + +Here is how to use the `LinkupSearchTool` in a CrewAI project: + +1. **Import and Initialize**: + ```python + from tools.linkup_tools import LinkupSearchTool + import os + from dotenv import load_dotenv + + load_dotenv() + + linkup_tool = LinkupSearchTool(api_key=os.getenv("LINKUP_API_KEY")) + ``` + +2. **Set Up an Agent and Task**: + ```python + from crewai import Agent, Task, Crew + + # Define the agent + research_agent = Agent( + role="Information Researcher", + goal="Fetch relevant results from Linkup.", + backstory="An expert in online information retrieval...", + tools=[linkup_tool], + verbose=True + ) + + # Define the task + search_task = Task( + expected_output="A detailed list of Nobel Prize-winning women in physics with their achievements.", + description="Search for women who have won the Nobel Prize in Physics.", + agent=research_agent + ) + + # Create and run the crew + crew = Crew( + agents=[research_agent], + tasks=[search_task] + ) + + result = crew.kickoff() + print(result) + ``` + +### Advanced Configuration + +You can customize the parameters for the `LinkupSearchTool`: + +- `query`: The search term or phrase. +- `depth`: The search depth (`"standard"` by default). +- `output_type`: The type of output (`"searchResults"` by default). + +Example: +```python +response = linkup_tool._run( + query="Women Nobel Prize Physics", + depth="standard", + output_type="searchResults" +) +``` \ No newline at end of file diff --git a/src/crewai_tools/tools/linkup/assets/icon.png b/src/crewai_tools/tools/linkup/assets/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..4848d4c6b19b1da998326dbd6a3efdf3671a48a3 GIT binary patch literal 32966 zcmX_HbwHEt*WL!BJ4Q-3qa-C2DFs15fg!PtR9aGUq%=x7Bn1VO8Xy9KFj}Pql#m*r zfKmgbVf*&Jzt``N#Xt9R-zV;Ku5+F9B;7VOq^IGg0RRB>H;nWw0045*UmzJ31?i`% zJ}4Og5CGiJyLKm}V6!mz9D4T=aj|VNXeZvdRq2<6(1&C&Ik*qi-2EKO(dgIO$hnLop7P7v-E^>WlE-rS5_VXL}3+D@GNT2zOcE`F$Ticu2 zii&O{S4RDdEUx`|eiIMNfK<>70eC>hWk1KTWvCBji}%Q+OE1JxGG)XFqDORF8Bsi% z7RBwWU;7aH2(ZPTj(L%#TyS1>tHdJ)2hBX#RNu=`oz(mjDSqj)g*Q zjDkv5r-m<;uK-3`#mNjgtDg3m%qYtAK&<$NloDSZgJzs0nJIV#;7wjjwi@dt_T&}n zzmufWSpe7`)6RIWc2Z*$RobGc@Xl~)laI9qvqKhwU#M*8L`UsC;DFK9@E+KlTu&m>&%AQjIalNkas`r^({)`?PSps2h`Czk?nE|cR_ z1a=4Ih?oQ(>CMVw)X*>fTTi9%a7d(spX{8C_RU(%uBOV$P9x#P7rVn(5NIHr-d7AS zw!BG}H6{ek{JY~*S8g?yW6|80QMZ1MMDW_nt6J_|9hNRwyFV8Q94{^n>pB);^ZlDW z+y6{OC3sWM{nW9KoVW;LWTO4F4=wt|2qxMVKXZ@(K}tQsIceFBZB3Xh(zIRZ1^#Cl zCZxzT<1yHmV>dPGIc||LS|{aKq{YeYw-z`-oD%f#8kZB#zkcw~S1TIO0h$|J=89t6 zvwr(!QM#B=I%=M>J(z@r7%~5S;wydE+2PknAVHgaldYzz!t>w^V!;UFd_(Iw8Mg%!1|-; zPkrt0!VPgd(OC-M9=nu>L5*|tylIl;<)!|{Ga`Sr2M=+MLLEepX3CNW3m=af4789P z3*#BlGupk1#~&c+7u?cat&?O7zz$kFN=+l)A&jLy%8eF((Mb+X8Cl~Z`+oYxt_;89Wa^=$0{Q8%a&mX`u(P%-D>2PO|;pNt$_Ea6w9|KwK@GOH1T` zSBy#6R3h9IDA#R5Xk#bzMEGE1mbBsxUg)&aaa)60e6`6u8ND$Lo| z8v9?r2~DPbVH|CSjKu2?pE`oW6Q+ARbMTH3QB)Rm;uIT(;X1!!LM?C1t13{1*I}$q zI@I9v*8gepX>pEI0E$B_B{u|2cQkygk0)OK51vn0%3PssJt&>z<5&}lJpFI z8QLzlTr|L=Y)7Z7noi7p$mp-*=mzLpd|Y7NKML;ps?!Lioi;gqzJ4}ugTv0ptcZ`Q zdeQ&}230RzL@MD0hYEBGW{O`mu?TowVVqV~>&OQD73RGd8!`hR`>HLEsGi%ui6b)^OKKG^uE$~3uG|huO&*Q9jNDw zS5gl&XM~AiBIm3aK-66*k{}}19wc;3Qi(vC)aogo9T|UfUFzvKZrn8-JKfQTzk$LW zw2+jy*~YXJ^I&o&O7$d4P-TBBKz&6=iUuHeMq!iOsc|Et;>lfV{8Q~*`~NOyFO@l1 z`}1<;pCva%QIs?d1VscNzVLg;pWuj+dma&O3FBoHsA7RL9NTr$%KY_SwcV0H`2CYr zcexNTw<-^G*wstNmsK_#U!9~9)mRsNFkuIQkOAPwGV)`!wEJxFfA3>?JH+Oe%8SOl z8RB4>Q`7z~he8$_N3cRhYuB-qq)P(3>B#?@gZztxNC#U8YV-B4%x+?hO=#x>Ur$Lu zgJoZ~m?2s7Q+sNr_YT$I1$JJuBiMh^^cVQ%hJZqVzWibVuJn2w-u^Neoix|2NIufS zm7A#R(QQeV;CF)zOz#=y9>Wmp+O+>EAFzDBYY33O?=`t-0=!LWN*3f?kCCKBjrhJA zD#GR{jqEjt@$tnXWPIt~ul+rue6%k=P6$Gq2de>$kn>%C?<)zux^{dq!>U$1k_`w( ze!i3#tMLEHj4U(eC8G#soV*0Ar~X>^_rcliNdm+3Q{<=U z+=o8?b#J7eUvX41c!BEp%CGLI%k1_N=Vwt-ezXAmVF+GA1B%jLoqD)YKPl%`%RN5%cj``Vz(3wB!T(AU|C*}Y=lFA6GVF^p8u%X{8zq)pv!GCoj+)s|DeNK7c(KN@P zDXDI1;eIEZNFZ6(dfdHhOe*NQ`@G*3%-sNJv((>fp&%5VZGh-3B(_LXLQrL8P9nqo zIjcUtCi=zvr5F1qXFHVBiqEf(8vi|7!Ws)@%wXeJ`QlwyBcObUD=O>b>gT8DMB(01 zNfYKgN-dxNIZf|J%b@6r80ts&$f7{+)^VJUpoN$dbjn@>7|#PQR+;^Fzq)w?C5*wP zd(SN0j0sYzA|7OWWp0R-x>!TouI2QHfQuX+s`zd0e=jimZ~IW)8Fm0G+BX2?OLsCD zCWQy#ih57+YAhWTgCgYxui?LdQDmOnHO7PExKU}gp*nJgff&tDC`w86s_5>oGx6qq zG-!iNgDeb~(VfjY7(E|NALI11UI4~%Da_^0b{onVbr%a8Qx17#_qs$~>VL-Pq+LJ_ zm#!*bID$6|A8O_uKYJ#HXW4w@_KAk+J$ZqiCqd_$Cp}Jqzgsl?&=PwY&WLNma-fQl z*MzfviQWT?dFIA66o%bwF{7_)76%f^HJPWGzFGdKCr*rpuT0^|MJKv1CM*1WaFhkN zSid$66}$SpS zUA!?k8gUXx-0in_6vKond|SGF!>%qx@5L2cqC&o zo=fd(B#pU4_+*r%%9Vyk&jzZTJ|8$bU8F8Vk@RM-z#7q!8Zz0O14fO09nmiq2X5#O ze4!(*V|09le<>1Rt{acN40LuZda^GmL*I3M!dfO$Moz42ajI$(;wdMf$dhp2@TNLl zI!f=FM(I@*tBNuD+N_^~Casl6j#PpP*_ZRE>w3bEGK9BnY;)>jhP_Qe*1%#A-?hSV z30%yE1&#%GecJQ7xbA?!@9PdrZ+N88d(|=2k#Yc2@`XzmIvF2?L*@D@qb84-2U^#I z9+gD(ap#Rvprqp+Q=nbVlIOLKqp#IBvay&eL2e5_!t$cT2 zzx{`o1Ft1SR1*85{p=w%M)vJv2=#qQ@y1L@V<~HR=a&4dPm$9afjWn^A#-F3d?x00 zw~g{nX%yPyt^u?X9V>PMrpsG4Na!?Cu+)Txl#QC5KsR%*Rr&Z!t)AHdCvVGMGk+`g zNAz%19j}J_iB&|+NB@j~WbbR!=FpoJGNM(`H`|f;-wtnJ1YOgGFTpe0FdP30ft;` z09FzTl`dO^#TU@v6>+J(>I#}IB@s%HGlUOZc#pPCpLry8j@~Y=Pr!z{AReq^%j)It z^QSepA08_hRkCZV+1SOlx*#9_YG;-@WinF2_2P+2&-kYc{N5&AIJFJ`F<*Vmi<=65 zRR86&>sLve-UImvcYX<0#v>s4O-T<|6n||viT(EJ5@It`Y;l8;$@VTA*#QdDz_;(E zfGY~b(+lRPU$}JjBoVC))-DuW1u=ALZ}9;EnXjI znfzrt&hGq?V{&}PYarckev#XNG{k6XT;bo^~A(Wpsz3%XC;u3ao z>{8O%Q|He7;MPJldU)RD8`PxI7$hd&<-vZUUd-()#vRT;_M!v>3K3Qb!O#5hepa1a zW*acVBi}c6*cF$}Q#7exF(f{sN_)JwonT(L*7GL?)t`_KnxZbc6r2#?ZBda(xRycEC=Zyyil zptBUp`9O&CR=fSD{lZ)ZX#9a{4p`C6RIb=MnV6_ zL$*(LsgxD~lB-Ct-$M|-JQ{vZl>WXL-`qg28&6-WO6fpvXS^S+*L)LIT?<;;GdHQ^ z>knm}JedLpkA=uJFf6+&xL_ZX&=luRy!(inG$0pv2H!>0!u>QFC{p%!AyC-_a!!TM zM!0EV& z4Io?jHCl&^r)-AUppbSlMTk3wv9mu42l7LD z^vpgb=9n|G`1^-@FLYOnxg46Q>d}7Y05vnPx0ttF0|l48Ksw`|2Ao#pOX8R=%|GSI zE4a=wgMMmtF_oV)vDIA?*}oV+{)kipIYU#h zu3abM@}ekO!85qx*?w33_iwM8unRJ^ew5~7~=zMqj>^ALH%FNB5}N_!>bA9%fssVSCC6T^kXHe0oAd=(MTr~&Q8uS z*?ETd5sOW|H{;YH^8`!mM5(_SLbvm5Q~ctn0#msd_zshwE>MNVX>+QcUT+ zng@zBQhCwa3$%?PTvMO{2CB_rw@lI-L zM>`pPAhLv9s;!ZWs{yqLaXK0**B5?@De?KPJ9*MTZO$z@Y1EP{FB^azx~)H*5HJfl z343@uN>)FyNEl3F%o3ybY@l12jN{+wC+#F2Nj^{G61hj_NWNX-SF&krg=qWsP#BWv z?S@>8Y}Pa?|7|=8CTdZ)3rjfWUjBx1WT74DNtFh^L686F>xC$76^*HHMzm3enYn=F z=TaL$*u`%@ZFn;@X*z>aXbpCLjlQB9^M45-A(;BaNRec0^ON(~V$*|q{}DbmfdE9@g?IYa#aM6ks~ohVj=e7YAz-D}9Hc8+x5$*3Puy4gvWP+kgs zcOmf)A?KW|=soj)Hb(wfe+I(+MBo1PmZ$m_*&4_V-krKGgU|}0BvE@)X#U8YzL*x; zUogt~qWm+4p|3>h=~SXj?`mS?(94{wkZ_{9YrQXj+CllZ`Zrih3@2re_PUwFS|HZU zo$bl^w*K*=?3fTOz})o>dXFL5+T!&nGYLfZejJwuUsV&BSgx1d1xOS@cg|8xx zSAcQbjC>L@gNn4p164^ov24K7)aq{JV>Xx$DHf8X)J=R^#t#9ElD!k=t8C)7I~f`* zj-GgmACp@kx1-U_F;F}fI`@d8+N5k}%+E6$ufg&2xp6lt9Rq#kQe-&B=cwll39rqs z7$Oek?tsq0$9%hiO<_gKIC=Q>KZK;cJ$cC62KV3SWmq5yEt1T6Lrw-DrhwQx>6M)S zPR%cixnN>7YTOi7ck_X|ombTJ?&;SvG>V0Q@VYqLWkAf0WM20+jV}J^to!`uvd;HB zSKCbMkC0DVVpeG1TE&UXKn6bw=*{5Yx3RQHop!v#u2`cMIYKo5mCa(z7>w1#gP`4^ zR)WU2vzsXF8<-eHN%{T$U|NY71E?jhEr}4o`ku^+%~C5E%{93I)ih2!USd~9k*}}) zOQ;xRI!FP;P@O##Zm?UQv=V1e{+Jr^1_yy78NwNCI#DYpjUCL#A{aS5Eye!1-LBr9 z1V`3^%kE8#Xcl-B&d6Z!_sf_(W1@I{+jYfmw;;|kCu%_hO2%VY-@;C*+lCIKrVU=< z->q(5mm?(`_$I#$Ebgi*eH0bXR~dP3$(hf(V4HLor(%Z>N$Zs45n!CQ&~Tj&WJlg!94@KD(;?;;rSLkX9?ex_@7Qzi0_eDQW2U@@s<6ruZ2 zyt`AU>XAuhI6Gb)`f8CaP4|a@>$rW3Jr;SRCC^t{YP#(uSYETuO zS%!q(3{`A!B;zql;XR&V7HPJHbQg(#u0|XuEc6AS(&;s^=thv>f?Jwd&v~6VSDO2# z3+}|`YC8wwXwvQDKKaKg1ldpJv7Pf{hUYlf5C?`jgsMp1-n`d&(ivPM(K|3hl z#gro4qp9FlK~zV6Nx(|#sIxVS9<@^cHB1@D?1{BAhQf-5dBy1JKph6Wg4$eavR+(l zMFWv?+>K0c-*3YxYgHa#rm(N#`4!pehg^*76>xgZmi~+L6LL%00qi&c?#d6I+co5o+!04JhGSel z*l-yC^)mT2v2B=J>m2)vAXI)*wZ7!u_L>fdm-VFE&nfljZ#8Z}p5~c>98tI|bg?@w z)*PYi%Pcm1=5IWigg_a{$Z z=syOd8UyY)*S7|0Snd}iV&6(S6N)*CXBGMBAzw<`0x^uf@_y+j&^f9CJ*OeS)52#?V%wA~`6rLN3u)iND&g91 zDUphijNpy6hrAUgA%&;hxipNnjK>#6!w48d(69TyPE#ngjNh-lwE%gJKYmDU&c?F- zZ)mZ-oee698P?=a7%o^Cyo*E`!QIyZmr#3r07t0!`#*7=DEC0*yrm~a3 z_ivltpVvQ$%P)Y#T+U zk&XGa6(_YR1`MQ*WYH}r`Qk5gIvHT>lzekq%JWUt%@`eo7UpGS?kGwHbM&N*oQHD;`7l-*~%vfb~&`{(5Ux zFk-Tki*Noy4$ZQp)&L&h39`Ggr_}&{{wGv31AN`?dt2#2P8dpOY~g4BRE7l|`M(Di z?0;U$2FL&r5<4hAn9`SAB~gHS7sGdsb)qzbW3W(JOAt+_|_?o%5b zD?83`8rnkij$d4R2ESQr-j%}zD_W%Z08RYgK})09NMRGy=A-~>kSUA>qw`@bf%>%nDJiqEvQ5UZl9rfuC{Fi2d98>`dBf# zPEt=~kepOm{)48}o|sfP+^_G*a;cQiMLz;z3N|?$2TG^w5a;AytcI9q3y3t~Jo^`Q z`owr9Swn?ElUJOpINQzcpvdH2C0^Tf^W*1*%iD*e3u?>)MbO01_N*~P8Be>6rXlV3eZsTkr&raH zu`=YyeCge%3lDVsB#86X<15=T*q$+MMpjC; z;P#9?vz_9bH=1ckT}|Pn*jFw~47dQ>H9>xHSNZ6NQMexLivYJgg(T``qA9QQ7^UG# zWM@xkC=kNt*F$%9qWzdm8lw)4V-hN~BEwig+}SLmn08jfgL7qFY(-?*cRn&+m6frsxau2RZ)s-t8LH^reCn_)IaRXWi*&r}pQ! zNuy7mn{uRozjU-A2<`8dR@`J|a4n90WzK6Q4fsZ;AQksK=wof1X(li9tL0=)bY4t) zRzcG`czAnp727er`Pm8M@tv8+w}v7Yz~dBQJ>6VGv7JP4q_>Nw$I7NnI$o*?*6-2t zC0y1vHka*wC+q>O5?T)wSatO>_^$DkS6poI+1n$xxy9GsiudDb#;jKUTw7d2{0`GS zBtJ~f{ZOBMzyruf)^5x*Q8?E*C8OFDM7!={^=$<%`^y$N_I&aR9cn9^d*?oPY7Gydj6m$K9-#4N!aFVJP~Ya7Gsl zFm7E)G=IUd8t8Jz)C}IOyPzVTC)s~i=&KA{dGGIMs3hlZ z$q&Z#C=8;)qG{_pcG-sJrSHUu49e(T@?cy5x6?GX_KGVs`l~PcF^#~%@4wmd7tgGzSpDD#COQGsS2#ZncH0Xp;^-u&X(ND zE7x$MY|j$ZB~hI$gOg3FlRG9aHz(x63qC}=-OM;h2}b4)I1W^U!|!o6M6Sb>DDHf0 zI0t!{yvg>@edS0Brw7_pG|zr$(Z=$|6YJ8nbuy?iOpDgkAu}hJPd0Y_DJ;g3mGFA8 z2fdLu?+1=dEnYz*lVyDydyg_Zy(?staU`ZIQdVVv;?{!pI}fv2nfy$-m7ASvfR>`| zq8DwkTNd!vbH1(QVc)r{bE@$Xxy4aMeGxF`gT@)fDDsvZ8>X8SP}y03S? z4?6DRy5_J)zk4)&`CaJ?AnO5Nj!tgaXngm|kD$c(ce%`RT9(?U9BAktN`J8B>t%be z2;{TJUwqV8O*=wxHPcF;O<^7@jd+5>X=%r&O!KW6J1UgPNeGglC80k}mE?I|aDKEQ z0l&}qd7RPJD8pN-SAMx(O?SW7``P7XuP_Jj*bxq49ld~ap^@T_zQ zUxn8F4Z;cA+}81#7#w-%S-Rwc&B2<S5T4IFyEt9;kOZVMyY0SL z-<)x^eg9%{YF`3Jj$=g2D>4@--uh^k`}*1ET)f?jrX?ka@6i-mZdEaVQ*kr!nsY_^ z!Q&`^1Ak5aOI4>gS8yS?;&q>ko9PPnRQaj7Nf6`aYug5eH}QF%M-6|J3(L>8IoNFn zKE8Pe3+?hoE1yu?2+SZ}#Yk2bQ=U`%i_auzzoReIE*XGF(!=omYt5+@SL$_!44nv; zvgdAjkBn3xw4}Kn|L8?ZVm!Cu(~LHG#ad0dc?)$~6pSJqA|x#xc9%&|Z-DNmQ6b17 zu~oPo-49E#>PU@!i!rj)-b|(!GV2o3JGq_a#Vte(uhYzZ>TE z`BCVt=N86+ZEBe@5qzx=IE$`7ug+{DO4`0SJ+j%Xb>j#bx-1`kP+^Vm>J__CyDv_I zl2SPEs>UCV$=9qek{$AWLnkRiX`Z3i_HbI}JwQS$GtB#XG#gwgur*KD4w z6pbYTEPIC5_ap$n(Z%o?O3Dv7fuU1Gr)GMXc^_Ai0CyH}59)w#Z+B)He%cy~yLV2I66ku!ZwWj`527MTPit4<``yV;dTuigYu zQ(Jk*hu=|x-^E1os!!77KL5>#wd#8BoNeYlub@fQ&b-eP+s6fjYi_&AwNj=$p-bzk zdT!oTnz23K$a0WCfCcc=)Z#BkCL2=@U#Vwc9z*=rBajMIj%+a|X?7> zlg_}MWJ`5Y2mx(mG4y8x>O`!pj_v}PiZ-t}n?oqrX6(5F7{`XIp2vP7WNZk-lM+J_%YH)8twW}aZmpnztTOXHQ-41 zmKHRH$px8BPB8UbB1(9c2b6&_?1PQ~j0{w(9mfU`Y2aro*RkiY2*CmN?z<(mfzEl;*Z@yz_~swaa|rgjuXGcOu+-?l30s=y_uX}KuifQTYAz1(#d6~* zs?8#4{p`wgP2}~LzSb*DtHZ@{9@q)@a0gP=4Jq}n>1CrMUe{LD6^uJj@W(#I4oEm0 z*Rj&>G2ILOxn=#lmE!O;!mJ;R?nCC80Ej3su6&TehKBMl{xwMAt z_*Q0@r;~i&N&QufJ}^lp(xgW%E)Q+3i9 z6`|lBp*V>Ghqp4a=Vr)`XJ*~5W*YQr`T8yInR&wJuQ~+@?3~Rh`ES5s{qR{U>v6|e z@ah5A%Z7JHD|*MglgRGgqa=3NQqVlh&HwO zr>=p+pd}zq6L%9jeKd?79P#|)_}rt(EK{Z{8<1rVYo~Q_(q40fJLxcHYP{La%iOU#4*Obr_fN6<>S4Iv zwSkAQeF%;cJF1|+FN_|4ls>ZI3+7oPLb8atPJY-!$|4?_Pmzt*5m{}IV_)%npg0bG zC?&Wr1!!0`4czMOoA&IkROU36_s^MXo|Ol45Ulmjh%x=^-DT*Q2R4;XG$_sJn8_ZB zSB_vzg$9O$<(Nmn!;P9a2o8ltjL6-d{oI|C*!cf=VivOPUABV4~PL-o!^5Re4v zdcwXwwv04NqhRN%nEQbzz4w$TB>XwW^JQ91IqC>OMx<7!#eZWchMl?LT&8hRbK|-X z?7oNf|Kh8|#u0C|X^sfnOg`VLzwvG2J{uvTeRl9DP-b%KQU>ge7FbP2ElO^s?~c52 zs~Pg)`+L_ys-Ks?-DvMN__OP5320e8xH&Y1{OY^ZX}RB!g7Kv3EHl5(R{oqR_C-S8 zxE-l5mCEJw=jU!paVbkyW_U7e@4`UpY;!7S?hsULe*CN+JIn%|bK~;SFam9Tmvv@P zcZA0GtxlT!oJIQlybn)p5Vw^AO>XSbz#sGRI{qk2mdw}IE4W$hzO>k=^ntm`?S*j_ zQ>kJ)MNy&C+3je%p1AW-L_OI8#hk&WMQ*|RZ;qx)x;4OKcx@oZ6Rm?Fhi;Eqkbjh| zoGrhrwvlKGs3iRr!yt<$w4kelTychneV@o>GWf?py)!pOVgHFV>%f<4B93}Ex>l@M zl4#Z2t(@&|1?R$W%&=R#9llf3O$64n=XMA1;BF~VoDYr%eXHlnC`J-829V{1B?Wcn zKl{rALDD^7oSq5S8O+nQay{}}-F+WVA*^Lr(n40XF%KDOZZd1Sd|mxW!1Z9)1=(k@ zz9JpxK1#D`f1!k@hX#i7bxkjBDPtvV$}^gs{9MH>-bweUo!_Z0@aaK5IG_m*3l&ni zVJ{r?)8l3vC*TD!Di?r~)w`=8C{>rC56kQF{(WzKu6PAGWVq$Z%`V467>u6!#tosw zU2FJua>b-&A@-q?+OSRX@c6a;Q^@HThp~MYYQfW6;N!qQS~H9;=9AV6(yx!4vx*lcOH{C zEt=f-BQ8azu6;gWZ9sT_Ulz>eZAW0$_Nddvws?IDrT#tg5PrL!<>hmJ{NmW7=ccZ% zB&hLBZP~kRt1ITvj;ES`l|u2>@#x^FLTCMkdupiW(2>hmDoTvClk3Yz(}L5Tp2;X0 zm5pYF`V*uEVSIvgeu``Ex_K%oGqq;w)cbztH~zGlc-c#o+{L&LQ^Ip;Iyx$~o6}Ef zwL42zk`BKrsD9O|4?V7SkB~dR=t2T7CFkwunzt|^(Z2g38HJh3BdPun*QEVh$YKH_ zv1h4^j1&)0t#dW^TMgYpnD)}@ZA9~(XI^}ZirhaEF3y_`B&94ila*v9LPkGGDB;?a z$oVr`ekCi+`>SvF+#8COT%%O8Ry8t z*+N9qTC)ZdnwJ*RYx*hD1}3J75I;VY69)VE!`?1inkIwgI&T7OkzM)tNPTHWih%mY zBiS7i;EwN{FL<7H22!=E-0tN^#a|u5_4}#(nrXR}pl#!@>x_d!aqIjG){mGRIS4((i4F}$WF%py=k&7sTRw@EB$^_hNEMOj=qI2d|}KU!Njt1Yhc_TMNt^WhoZ#LkeFbc0L}-HxhX)-;IUbN*pmd z)Y!rwZF%+HSG}&f@4Yy1t$oy^K4`-kx06Wb#WB_q_t{6Dqbas*=XKWZFkb@yN6P}Q zdF1SmNOvyzEr0NfZ`rL;_gvT>|5M-;iLb&d`DYSP-B{E7`gLb+p9Q9LgwcB4v!>@n!h78Xt zSPu=~Pd&BQBmPc4JBub&fL#>Bh&P=Go=zHtA(43EqXsiOdf_aa&s!XDn&1j5Q4~KB zj6Hp619YwjObf11&pVz%JP!lV;Y+R2v4LTUli z%$Dd|w`(ajeqDK21D6HpChj^z2ET=0E>k|?X%Vr4u*ubKuEP=UIBv(f}EJe%@7s zQU0ecF+JM@P|q9kFn!CcXFQx*us>s8zR^SVOwL%tN`xKSea0anCX8xR=QH4~cL?Z* zV58t2ReVs7nC1NlBEPS|1;8T8-n_!{eGuv-eURn^k$P7P~Azx1bE@O z$!E&cHo-pgjKDPL=||~@Gf10>Ti202pTgu32mLoy9}FQP@L?G3mP!>LzKzjh<8G~1 znBmV2{=!}IT0jf9CEKRVHYlAmr!W^?AycEqY`lV}s+q7%+{-$gv!38Ym(3Fbhd=@w zVG|xwHoj7c8;Hvqh7IY(Nx!Q(`NrpLKbM5?h@JBh&|4)%E)8NG`>u+FzJwf)4?voP|m z%(U8>z-UtRpvh}5GWGX|R_+kPKM}Rx61a9l51^hFMB-G_OOQapg=2c4`xX@?{0PpS ziom_bTDMIT2hgKYA0_D0^@N{gZo%F|{3$m-NM65GnI#9n4Nq5pEE$xETW4HYTTG`{ zmqP2##nz^&S6JE&<*MUDlDXcdk!ce&IrC2z(+m*pm8YsH!>A9J8hv4D7AiF66y>OU z3i_L#=@$kk^EFP_ZbTV&cm@pna{Sb)S5rQ~XDC7BCy19W7h!pR=hr{Bp;)S5C-rA( zJijgxbSNC6OjKq$E(p7((z+=bV5V7<0q+!C+(u>E6sd7MVL#Lsl%is-mWC7Jc z*r5F|RNIHI3LL0U4iG6}vlQ)#A!3h`u*2i13^ssnD$fy!pCM9} zoQB)ayMMVTK*M2|>Og~)4x=aKjo1@|?j3|G9qP~15E1;rw7w#EGUTi4GvbwToPp;T zXNn};bmZsT_bFzOB}yddlk_s09jA}Zx23Oq*$Pm*N0#rV83LZLvE#mwUAbNW2m%pk z6)6WNx>{~5w%VwJhtD(kK3$6@b}-O>d|F}ZW4tygs=qh4u2@z4CtgeLDO0J*9h4&G zPw|Sx+sS&Y<$Nlv`|Qv?o4SNOYx?35EhPL(KpynrDnBk1I=v{VB(*-Mrp=DD!>97l zqVYuW{>GjA3C-~t@Z>@8UaR5DrK4a;NCjQVf|Tu+3c92q=d7L$6*+WSxq;OtV1ZR3 zYrXwiXgTKQ=}nRI44%LtniBg621+>%viySBX|WjggxZN8`41kp{jjyBWZNG9>neWx9k(QfejGVH$`6ZibId|3`h zq{R$}K$8i>$gOGOQ!|K9pNNV=3*ERqi4o3)KQ5r`ptl5>SjsNGp@v6PPs}_X5&Gi* zXh%=?T}$P7mQ`oElhpTxQ{*u)qqOxTDs!S94khJ_CD-aGTKky(BpP3*w*g_i{4HMT z7Qpg)Nj$x$mG&JacG2@#tyq2_G4Urm%`8bJ2Ok%<4$Q!V*u+x1!H(Gt< z={9t+C&9gO?H7pSeY6Z%tCwi4KCTe7Ntyj`L(#d z<9+h5)Pry_y^|L5V?qh7`NMXm?Q(;^vjAJ~nbdSn7rdYVx2Tfbw*Y*WcOtbsjE6*8 zTBhIjP%sk1m>x%9H-4EMce17&Mv1P@u1$&2qISDBh_~Ka1CFu%lw)m*oQ_}ARmhrd z^>0br_zvc1Nt0YYbuQ)=)?e|L^F3vTh=S&sU{ZghkXF-b=DwAoD<(8h9ul|nsVho< zy%*ky^q8>R!{ zX_*7ex~Z}{FL)yk1$Ga&-+?ktDnS{_)zxH0War{(yDyW2R3CAMXuBfhpD9V2U^~z%omHIKn=c!-eJC@V2SBLnE|BTO%$?nRGrM7C&CGRAxnW3>GUcP z42OKR*%Cm#T2TW*gNpnnEH|-$oxDY$-Mw6x0dApd_Q;Vbaa^BgEp0A{UQk>Eda69? zE1|vo<4*JnMq%0n(_z&3f9-u$R9r#VB~9ZV+zAq#;Oa?%>sB4v=j>Ck4-*Xc-}M_Lv*DV7XTj4i#Sl~lS7v6Kp!sR5`#E({6im6qq80Orl8tU2jTfziSYY*Ch$<`rv)#2cK znwnrU@nab*;g6sxM6dcTY(( zHA2*B=${VbTZGx#z7jbto+U4f=Hb3JHFGVrydBdqsKq&m{G+mc?e&(Fv7n70xV|_`!Y`z@i@Nb4kJMn zLG;`(*z{e~iZC0T-~$`iEH`zt%Z1hV69SJcrGM9fQ6h>hrLL;*9Am1OJ_}*6n|t38Z}Wp;bUS7-77PhcGg=%mw!hkg>8e*xNArD0kp+R2!c^0YqcQ!^e=s1^(Etgt0K0@H{KIG+id#8DQjO~(mA%Y zUzstQnJHZUN}}hXQeqlMM1;~RgkdlSp1fY<{SGUI!_3ezQiP8&ya?9lyIS-~$7rvC z4Wi~@&l$dMe+;%az?=kU3ym?hepSVyT-b-R>CQO@0Lu7Bv5Po#Sn|jhzcS?QJCS7- z^IE>g&8p3$lI^Pq;NUxOZsVOsAwmd9oXzGRUP!qPP;T6iu-U+qMat&GBV>;74rb{U^zyqt1 zevSmSW7dP&-O@kmq-G=ca7hN?PRcMii3XQbg(?t zLWF5T`s#+#5sXOOr^}GRTJYK$>H@TufDT@fKQg(_T^1HrWB}p8b_n~o+t~ij{f-)4v$qEz zyup6LlbhxtkQ*{cu)}3D&-=OupI|uf`}h2TmcglklE5jv+<2SfFW-_VEKAe>?9stDgAgpTR9UtVLAxHCOASsw{CZ0NcCL$IL5Cp=ust3=mO zv%8<5o>0u8OG1O+Bq3jV;?tJRr@~V?V*gxmE8P$6a-9PxBg8*$EhT5_z)A18y-@Rs zQ&|Z+88mLc^@}?*?#!-sg)_y*dSaO}qlGxYn?DhM@Niq2JurWKRmv1spaS%va`%h zRI!w%L_Es}&8HRoH>?O5md!5ywnVN)f*2rj;zpSF%##=f$eumtyl;*fPx*mUa;Mo4 zLyVeW6$gw2<%s>9r08#scPL%}CwdoDnxPxZJBeg}H`Qw}|Hvdil3V)kIsLBxDoSlr zl#@QI#FEIi@mhBxf7>L>4Az7-k`t4}FAIJ91D@p@5TE?Gi6Dj|L~i5!#*&b1@xg#T zmJNY***w6pi4B36%c6`~W(RiF5Cd}w$qmxrXVu$^5tM+eiB6<)WIo97D{hzZ|shV&OuGrwMaia4vTR)&mNq z4V{*3FqlmSLdBOj+Yja0W_lpJZi2DzrvauCGx444$5_ZU?EvW{i9* zM|4V_{904$uQsib+D!{30@Cdvvax8E zCNYoEm!Y(S!-YvLh1cT~3^z5aroxb(?_iz@cpZ~r74U6N>;xhWQb>FROnN7ELvUdB z;v&rcFJvTQ>!2m1ZjkOl67} z%;@O}wk-;}qL=nZL+h=0s9+mi5t{c*y0A!tM_C#sWs z7VuFm1+l$sBEAF<2X|8!gu??55qSBUz{`PQM?HnJ#Sub8MWi(%qVsATC$u2Uc!Ilk z?BHhHn891S38tqpD`qzVAC+tF=FNq3orZfCVow0GvTdhW3&JP?5Mkn|m!fK>iU0b8 zO2>o|%X`44TIpC{1ngG}!4Q)@Uv;%r^WQ4|lCroGzO8}fE;T1Jvb`#=_Zp#9pgR>${TO=F>D%o`! zdmpT_e)TiOn5@1?N)`@>l$bfG`J3qej>~O06Q7Juwpn8PTa?y+ zPRKU$5W1W%@YSH;fP5Deu3MuPhJFGuw~ecAFnD$3(!M@xBqhcbVEHlio{tTJ=oXlP ziMIUi=$=+rWjh$tXDTgQWOm%3|AndkY`5D6e73}JEi$qCc`xC4u{2m6H6$D*22KXT z9_zJrzVnxa*X_XsQ`ixXcD?x;w2mmC@Pa3R?oLTOCy$s#O1M_?l$nXm<-7A6)g*;> zU}JY_u%?YfpXu1d%K7 z@}sbxs0}>Zf=@l9zcaVfV6b2cu>Udf5j{%sZ!l4Ffgj1W?O!9-Ng$KuB}sv^99%v& z&Nm1I@L%Jo2y>}&%Y0Sm(cxliAis98jPG%CDsnfcm&dHc4@d-}n1E=lQ82B8g#+|% zH3lbO@j3`KUYH;j%egP?VPv)^Dh+7KukBfg;K}x;iI-<+;jq@wggNlZofz8uuoAs? zi6aKUT-_Uh;^-?U-wRY&(3mibmta$sS2JupK`JbE>BM}wjezx{*dy#hYtxXlJYiqzsWAu z(1uBQk0*-oYmHI~-tqwFl%O&ncmbwSi|?OH_@!!0hP|omhzn~o0#Kt;{%I85EPvlj zrKKI7G)T|xk&j%1Itn2;FawLwycwWcixoIU62g!WWRftlImGdEHR#}gam65K(tx>9;Z}h};ykT+ZTPv8@wwHAfj6fdZTJ+p1 zr1;F97D+y0^U++uVh0D5AJ%hRz>-$b&Pk^@+Q1QG+24VJ&J;o>Tr*yA5tX*jS<%YO zCmNE#J$x74m0CBa3nodqZB@2}t`XVz_TUI%p1i3U&PHI-6@gW|?Iz&c82zBAJ@KWR zBQsifJcx3-=)xx7tJ*Em0T!vW2#zC+^O{b`KI=RtYO?j3 zgi)nfeU?oZG0|ykDdc~?K-=jRCj2RZDMUc_+K$jp-xEzHVive@UtD!oUQ+_=VWCM( z4!kf)?CY|z8S?KU7wWxv=GtWP1cxZy7CGgcutQyd5)pN!EMm5#Ruh(uV|I6 zEp9nZi~~$E;s@WZ#~k)vZ1@~w5nmdJA<37{Is5!w9{#+E-c7m?l$~&(L>1!eWh^Lr z605tK?`Gr6SgW!Kjh3 zmr1_%>i#$Ah4QEomTc#|%=TLY#h&YO@W%omR)xUAXdgougc3c#`4t4|Z3S>y>jKQvD(bHq*Xe6q-4W-`k2Ga_zTe0T}mwuCYMFr=2tX8{K$O2;tMe1yu3 zDMseKZG?R(pFk;hNBK1a6sPHjVB=()@yql%1|BkqjP!5b(M7xRY<)-+!L}yf=SheR zl@LW%Jo#CZH*&kS3tk2N%}?C+$I$lJ?NV^Ea>!u?S6BiZYgJcNfZ2{dX9+TAo<5X$ zzf@{W@p-Ji(E6`~PD2dQo4oP)oc?WO{0+z|?k;&b1iwQz6HP3EO*y^fFGAYs5?<*w zDkmD&+4$2NJ@BqQns~l_`VxJiWai74eS<3iP2{#^i4nHLuLYWd@(_~CatOD*Hs0|6 zDs&3k{LD-(MQRh55$t1ykKUoWBkYY)JCZ+OT7aIO-;IhfpS3uYMVw#lgF>ybb3awQ zgODY)ImeTyx7+INHPEMgD9t|j0sNT^w=qU&g-zoX`hIjfl;`i)1JqmToM||}>N9OJ ztTJ)y%0*1%n=heFO79gPtBIdmEJO?;mW=q7`bvJr?brn?TI=}Du^%qxl)Tko0H2~) zQTGiDb31Z^q?vD+wE1CvY!(VbdSR86;Rn^Co+%tz3*K>*2( z+BWWVQ8d{PzY5(A3~GM(Fauid;7L3`q+(k?MDjqGP%!)p#O&oJUbCSujMg~EEbB35 z(#Rh$3OoB4vdQ&^jsv1_nl&YVBA#u089wdmij_Dg@L2x&Yy= zf5-&{6#szZ$qLWxik+hQn8sUduZGyiM{{yzfNo^rAvl#yBnSVP|5c;y7*~`sKb)mX z^E%YTuU8*${mH=ld~T!}3Itl~xp8 zQX47OmJ;#nRr-6fv}wv~M}`dH(X`ps3hKL_3Q_s`%A#sby*CsvxccajtG%jaj(lPR zOAxt8)@CDrORq)kMn+B@)+E?&!)R}nVExFx2C&zOIXnFDv4>vv52XRwuT0YOf2Ug7 zvLdcKZdC*5ZUW@tj@%lX)%z&#IUYJqKrpG{AkBe?P);}YQf`eUjfha-e4B0ZaH+QH zh}I_IT*W6?VMXRsEc0yW`|N(22qwRzzqP(mkL!O*C@QfaKbCz%2sjO6&Qfx zhPCHT^^F#>3RlibKVA4AIlEDqSCb3ybO&L%ZAf9@_;@%9Jd$$i_@<=q86%zgo_{=( zoLzu`uX<9(Q+zc;Y5V-b8)YUDM^YP71*BJ|`pkpszL(6WsJ`g8gcOu}8bM0d@?`thg-5Znc$`UB zSvrm+j|c41Iq>3B3`lYWV(lFoVX0E&IQ3+1oEwh&zv$yiPp#7^Ud;jfg)vU>}+gHi~uR`*or zvw76v#CvdR#r`&74%K7n1iKrcM;KwYby#3*Yj1W?y!XmV!^ie1&wz9WdiAqnaP1@^ zTDFdv7aW{g;r)Z(Q}b|oIRG1<6O{*5Dm&3k$`=(ZG1_e8H6ruw-eo&qsmMI#*vrBt zV)lt9DFVk=jzva$bUNt+<0Rf_`;W>mK2er=dlb_Qfh;_CFwpymTzdg?y^>q*!Ivws zm?xxyA<%*2=rWRrTgox51Sk5 zHM&lLR=IA`f5ZtbB6E?Nx-TM)Oy%Jli`sE?1P^d1g%IelZML+#pt2NUD(&P|e-RX2 zgroa;I#A9-KZbvpLSv<_6t-g)478NvhIYS=zB8hF{+EbTLuki}Rzi471%w8+s zRyZA%I_EQYiKYvYcS+&$a7YY(B)v}i^4vrkG4oYTC#@pN--r*-B{>25FAeg&aR`a$6>SJXCtm9ea|+Ep06D)S4BVpGW9}WG%yJW2c9R7j^CeZYH1d zu71T$lC{77I7ZkhDS&7ir}f(I_)@=g7K}|^`d;==|K79=w+$t+2VC;R$vL0tX*i#T zjdI0+HFO=z$$7SWA{x-_(~mu$3Mtx{>_sQoxc6vyN)KEsjKC)EAU{~k$&+;x`>Yz)sq!N( zK#FvzBOXmPAf1=w{hrsqc;zUMNXp?7sI3R0BprwgcV>#sZ)i;KvW`zDd>P6l_a5CV zQjKn<{=NI(xd6=Gg!xcW4-_KdBV5q%M zFTrnZ=anSzNbex#fYNBq-1|uHI`@4yxC4YOo5;B@ahL#P79L0C79+&p63 zhAgD)HR91JJ$8;`gXv!<49{Nlj`yk1ls@&ONuq&dmTj5ZMv8s@W3~g;P@{2V?^-`| zn$0>R5H82yuc|r4^AU{@R9Qv%wlDEDC7c#fgF7>6+^`}_D@0uZ2V_dW?8z+tXfUeD z%Q{j?1^ZG5SJAKZh*hX1>>$rfAK#4he>0uF{_<<_XZJo$0z1Do5t3QVZ}BPO?r!+B z^b~8MIN$SjBoozMEch^+?sCy}IB8IBF&J7JnD9-O@$w@MCInM%S#BD8OpgT`)&Y6* zetDphXTel&tC-ubH1nl@`qtd36kUyUQ2gu*E<%VR-6u=9rOmEC_ux;a99g@HF6AQXDiycp zDhK%X0IR5pw+%BZQSu?$s!T#8mQ+v!Y1Xc7qWR%on?G&UI_9CHf_qZMA5^T5BHHFF z)}uGFkT&I;7v(sYj2wQzNbSS;zE!K!?tMqIurpQllyiHh<+9PV|A#}?bPUqa!6Zb9 zz$v8wQyVS)sdnitFSi`9apl7VLEIK(es^eWZM~`Lc^cBIR=6K}vb8~)nOm*A`!H4X zH9ns?gtC<5O`@|?j26tQD?i3F1ofJu$s^2%jJhYqH>d~Ek01x@>Vr2#sxwhG1@E{u zu`iR`3{p}x1-E5+myL16-aLrRW>t#di!S+{BiBUdt?5m7KTsb>w`yR^Ohj^OTM3 zLBXCHZ!L2k|Ga3Lht5w{U=aDt6}Yu6XegZYgZoMo@>f;U! z>k#d-S-*Fk4|+$Nna~2*xn`d-yl6Pa@?^*{7B*&#WflPucE3nz8J^4$M}DvRs9`|c3Rz;lD~l~-}x*pha)IAydc4>-1YTv^LsqM zMk~){o#L)-pA?6#i+M=r#~=Qoe2 z!Xu`b=ke)T@#jl29rz2T3h2|!>>cp`q9Z$j>8Me@P6hsLk5i^2>?(%!z{rCzRkBh+ zwdnncR83MkJE*5BE;`E#+Xhw;t_)#JgW;YvBIZB4YWC3DiV%zh;MkZMe^Ew^dNgDm z5MY)MrK-B%X~aVQnk?fJX!wL<_m-JWpg}-{R(llBK#3{rswh2zRcY>P25o7muNQ%2 z>|y6?_pD!EY#0}HsKW4{DGhOju48vr_T`IRnt!=gc(Jh-H|ioBA)f=Me&C;BlpS{n5RSel4(M$D3xA>%bBsHwpz zz=~7H--Jv_kHr=1ciIWV{z}dZ9g=O*WDPc}p(M1cs8FnvP$^9DNS5t4>7pIgDHhjc z3^{sH!qyMH-}j$I+;JHc+Aw)c3u2zS4jrRO&TNxbuXRTqUlr3AVdC?PHM$q@3~&Bm--iS>uqeCF!rstSWnoLo7_HWJ<~bgmlDxz^DNk+Lt& z-whQ45wB!f)_haGJL2XNY^#DvNb7>B<>>~k5nE4#&{zcx7EV-wY9ehz4^5)i*k#gs zt-LmR-p8-|X)FDNNmQ8aVrt}s zvbr>7ZFW5**r(gN)4O3fSoyGK)4x5{*Sboci;pV&mi`epPs%{>3Xai6x#w7Ju{?!h zOt!|_Fjd_Z=wk}QOe2Yf?J)_b@RB0BtKn%%xqBpDXzz(* zMGg%BK^Yz#urI*TQ>an!%BLt*Kk_9Rl(|yP+?b=H-)|Qo7H)h4 z#Ziz{;!VNiXh3R}z@l6ONs`m7yP~JBGb{Au%4wF2?BvF7W(2Q*k2HpQ^(-&&D}U9a zQim@JkTBL$?C-VG^FV!nNao;|XTR`IpXiUk4nd!3Cu`b^FWnzM-3<;Hf>BdYOk=L~ zoL_)>xItgu)9q-#lMr&bS1;;}Y!V#Xv1W~)b{g7Ox9n8U1tuf;$frVU$Fve=M+3@Y zbU&Q7#!8>R*MS6M|1F1NRdz-n33Us5dI`A1U7m57tnVoqF3u#9D}PpgzVX55Rfa)D z4DPzWrEZ<}R6>6ZYGhATPnYQ_hRa&NEwc}-ZCi?c3HHD=w4+pLUL0}Cy?7!Qv>|JY z-6EZ%czg~fw#m3iOjxaDgLdW&> z?7sCs`!m!C#4qX0R_2L+6~JG1lLCMSQN%2oM?|>or~PIZat~?wn<9(5+|zi zS|4E6ml_xc()PExOwgik6%&%?j$@hkobPvKb3T1LG6h@BOg`3ENC=-keI9+1j%qYP z&POTXdYY1LU1CaQ=~Zlz6hC>r9C1NOo8OtMWjFt_I2_94z>Iw3k;7{xvM?oyCOmzQ zRPa$?OkFUm;nW($Wj7G((kE`mqPtWApTQ@%mj&lM99D088;_rQI*ZfCri?dSmtgYc| zg|XbUn?LG_=H`?OTL@zUu9XJtQ>6%fkeikDZ`HlHmKn>Wp7`pY;EDu|z=FEeHv|B29jAo=!~il z`g~KTo=7|YFwXd!bRf|@C;B@g+OJgxgM-cfLfc;`{FJXLv;Jmi?6 zw)yw(>G8(-Q+|6;ZIp6qK@y^yx>Jx#dc4=_r}`S1Rcct#wr@WCY>&aDp)oO{?;|w| zW+D7ff7cMzrJa#z5+laSrxVG%Uv(9p!n+ljwfNNJcY}Y>rw*Kr@!AQVQteHP?J0SzH0@4}Ot<%9ORHkWQ=ZZg9&lSH8 zuoI`0{&B*sqO6cN3M&jNLH_TqmD9^#UF~*E_y+VNi0l%{XGVPA9zj-_9li7y!{KOj zJDXNtvXs8=bD_fSQOcmT*|v=m@0m)=rUdYtwj9^{zQlu{7yHdVXNizVpe-^6g9OG3 zak9L?AB?8f4+cFA_AR}_$hH2nuiOVQf!@J7_#vbX6ow-i1|Oq)^ic(*Aur7{{N7^Y zJRMn!D?jC@f`5RGH5B*y8LgMrv}gXIuJBXz?}d=T<|=yb)yKOl%?f@KRY*SdS15F% zIK^}~P058Hg=d+C@~!OtG_}*k<%z_qvI-+eF+ACte%lf^A)BMFO^$+FZxBQWKaMOh z-^B`pgI$fGi&F+z8kGS3ArRsc)?NJgs@zlAb8d!)w3(>&5*nMX?@C9ZZ-QtA_inh+ zfn0|bt&=PyHZlKsHbcyo|g=d#hifaxMb#{bg~|vglIn% z|K`t~M>oky9i&;8Ou zl-IOg9))v$V)(I-9Jb>s0ko?0@U!=rozIl{M)wU=0O-6g@bbvg<$9XwF2DiS{h9bM z4`S7TdU@bFy@rTUnX2wj@YvPf2Dp7jDj1NHz6UG7O5`49e`jy)i}=muGWl?-z4TYG zoJaNB)uTGpAR;oF=v3qd6o0jZiUW^rhwWEpqCkQ;%;s;jS7j?Ja|=M&ju=p$1$Dt{;^1$^0j{9sD*nS7ye z3K2Dh>_S%8Qj5c99I)9Ir*`#WEGEvb~{m4l3Q8&A0(85G8$k((Hl+uQN@Y*Q0K@}RD? zy)R_l5aim4D&I}*b9KKgL$*Zol3(;Hrr@NNXi z*yuxF60<D-ZMEs8HnvK~2(snWcOOG{v1I8c5iKTUpY@b8ypFYd1*-GGj^l%8drY zJ*^GknH&he7*hX_l#`~%=9?@M@R}P1SInX#WIs4#+mP2JuOe-(!+ijmnlwi+>U6*! zm31e&JzuNxQ~pnVWD@$)J9T*6gI+}_I0GDsG@k289X^5QrREP&?Oy!H?8_uE^Dxg;WA_VS|>&1fex%H{Q6H z!p(=k6e^-pFr^1#5cR^J&=i$Ym-V_HwRsJ}I1=rW4KvZGegIw+jGf%rRaOC}S>eOt zAhc`Qm};pKNN01ysS`Jo@0UYGsg zO{EQqp-B{PXYzbPDK!u|E2@%_`3SlBd}3p6DrkS_u3Xx@Gi?)hC+3N{a~Q`ueij%)Xl@HorKsTu3s{%I%NjNv`_W)&S2a_2=$QkJb=wA`KXtXWu zL<=KVU!t^03tV+`%cG$}9>KiFg#-vK3L6076|*nOI;b151#M!1g7_Z;nG8gG9=0r* zb2*9NxVyK*-jyb_E?i0+5*YJHHUID|IhKd8r%dI|D~we6s#o4mv^oBrp_UgTJOn#V z*)(qTo+@mWFQW5Qd7qSPxP=J|Ry|{RL;)t_FtsY(f_s$}1b|XZ4cMhCP`%7hmNO5b zxdL;M25v}XJDhwniB}@$swl;gIx2$h8l_SdwjAs-yHj6l?Z3rPxDdr5fAl_N&z9Pk zJ~mvVXCtJg^q9B;odZXc17*77Jj%Y_V-QWD6MB)CW5V)Pgpn4r}CSM zR#Ay(8uL4-;ABm5%jrMI>Y?QD=_r+1h5ke_#uOv@=ew2Y5D6HRs-xdK_zb=cOB*#r z!Z|h0Z95hv|E3Z|xD0F+#W4YOzhC0I_1s2YZX-o1Ne{$m{UL_>K&f?#i@M5uhFG>Y zpsAF1i`Z00_6>ba9k8a;Sp2@M;Fp-=bfzeQ{Ifcfwz^5I%c#i5!bv3TP0X~DwAJvRFaW!PXb?l097(>CTsPiGK6!a`B#D&XZJ-+PIT2pp z@Tb}J2E9=hhWhewH&rbSyrs7_+5XM^Kx9)Ju4I_gh4Db{VcGr0hIgIQp9Nskp5m-L+*op5)eeE!BV&&N6RB#uBz01EGd$*Y_ zbi6cXMzwQTw%W@qcOg)0*+-SI(2FyphPX=G@H+oa7V40S?NqJ z)KrX*bW2f%r=U!|po86&S{Ny9e~)T?pZ!oXF=QK6xN^x!W)KuiF&7&3DiN=)PiZ;T%UMD7QSvIB_(W_n=pRdP>0pL0$MuXdR`O4dMukJ-&+pvY+_ zFkHnqp=H&#(3mSPFzQ|rv_qGJjI~B~DW6@|E-bJZ#HRlsF8sdifOyVMJP2ic-;de@ z#VI0bkKLn6MS%LmuUCKXlX0?V;%nlq7~q8n{e>zTF~GMG2@*nvDkb_UHMr4$LF#^M zRMZdf*KNPF%>H7y2-k1e%km35M`%>!hBi0)uUEWE$CY=g98e|Bbt+HLJQ&_rj&dd}(YnJ6UC^*j;_4m`&1GJ-eE7&P)2Y z$-&&@+?ghV=%R>n_sAUA%XUq;AJM6}_s?;9Rg4ng{gf-6>8l(`uxrZ)rA}A#(|4`s1+25|y1+A zEXh|8O1zj8pfFKNiHIe)jzWk1;?vO17)*QhiG!>$>3B%OT!rpXlK%)t@f!bK^sHL}92)oO8OwLoD% z+|N9@brU>mB@~tbJs9O~K(x_=AIgZ}Bn=^a_^3G&MLy1lBYS#3e5Lt?S7~G#2V`62 zgMjqN@4H+9H{D%6Zo@009X~tG?~3=!w4e`2hjP1_?zv-hnz~9EXakwR=8iBcy6^X}U*F1XW$K{|qo8$$O#BO~NqOP1#SX2=m?Ar+5 zeI6-GCyS0RV{UueV3(*Jj^sJwoYGPpNg%V%kl8a&A6fRAQ0AR?V8-sv1s3Ucdgb@~ z$gKdTRr)9qq^qq+k~UcO{R~MagN;g^!fL}*DXumMZ7L?FMbg@n^3*bmj-ec2qS5U> zWSe}IQb%uQq%3XD4W8{{@R+q3pW19|h3fbx>#CgL1W|Wb8Hds)(z=`gcO(yt((Th8 z%n0cCZY@iN`a;g*iMGT_A1MX|bSZX@rDg zjf#+hXYq0-(SpnazlQxH!FpKaxbt5_DD zD<2e|mnk3=xG=!MMB%nu;P_p~aQ_x=KmP|?$W__3LqT|6i@R zgMS!Jc~HCzvnRAHc`zMiUvb0D9(ezWlIcl+c>;?%=|A23Zvy`r5pKX2Ft63G|Id&A zxyZ&)A^fi!@b;u!k&Y?Im6?yJe0F`k%iEe&Iz~h7bE+-x2^m2>)j^E6{%* x?f-ZAzs^|p$A7B`2nZ%01uS97eGWSWBn{11m~!=C^E literal 0 HcmV?d00001 diff --git a/src/crewai_tools/tools/linkup/linkup_search_tool.py b/src/crewai_tools/tools/linkup/linkup_search_tool.py new file mode 100644 index 000000000..8ddb81527 --- /dev/null +++ b/src/crewai_tools/tools/linkup/linkup_search_tool.py @@ -0,0 +1,36 @@ +from linkup import LinkupClient +from pydantic import PrivateAttr + +class LinkupSearchTool: + name: str = "Linkup Search Tool" + description: str = "Performs an API call to Linkup to retrieve contextual information." + _client: LinkupClient = PrivateAttr() + + def __init__(self, api_key: str): + """ + Initialize the tool with an API key. + """ + self._client = LinkupClient(api_key=api_key) + + def _run(self, query: str, depth: str = "standard", output_type: str = "searchResults") -> dict: + """ + Executes a search using the Linkup API. + + :param query: The query to search for. + :param depth: Search depth (default is "standard"). + :param output_type: Desired result type (default is "searchResults"). + :return: A dictionary containing the results or an error message. + """ + try: + response = self._client.search( + query=query, + depth=depth, + output_type=output_type + ) + results = [ + {"name": result.name, "url": result.url, "content": result.content} + for result in response.results + ] + return {"success": True, "results": results} + except Exception as e: + return {"success": False, "error": str(e)} From 5e00b74cd493f2fd43e4cdaed49f4797a162734f Mon Sep 17 00:00:00 2001 From: Pedro Pereira Date: Thu, 19 Dec 2024 17:38:30 +0000 Subject: [PATCH 47/69] chore: update readme --- src/crewai_tools/tools/selenium_scraping_tool/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crewai_tools/tools/selenium_scraping_tool/README.md b/src/crewai_tools/tools/selenium_scraping_tool/README.md index 631fcfe0e..e2ddefba1 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/README.md +++ b/src/crewai_tools/tools/selenium_scraping_tool/README.md @@ -31,3 +31,4 @@ tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.mai - `css_element`: Mandatory. The CSS selector for a specific element to scrape from the website. - `cookie`: Optional. A dictionary containing cookie information. This parameter allows the tool to simulate a session with cookie information, providing access to content that may be restricted to logged-in users. - `wait_time`: Optional. The number of seconds the tool waits after loading the website and after setting a cookie, before scraping the content. This allows for dynamic content to load properly. +- `return_html`: Optional. If True, the tool returns HTML content. If False, the tool returns text content. From f11756387d25807434d82327fce448f8a5b84d58 Mon Sep 17 00:00:00 2001 From: Pedro Pereira Date: Thu, 19 Dec 2024 21:06:51 +0000 Subject: [PATCH 48/69] chore: add tests for SeleniumScrapingTool --- tests/tools/selenium_scraping_tool_test.py | 93 ++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 tests/tools/selenium_scraping_tool_test.py diff --git a/tests/tools/selenium_scraping_tool_test.py b/tests/tools/selenium_scraping_tool_test.py new file mode 100644 index 000000000..271047449 --- /dev/null +++ b/tests/tools/selenium_scraping_tool_test.py @@ -0,0 +1,93 @@ +from unittest.mock import MagicMock, patch + +from bs4 import BeautifulSoup + +from crewai_tools.tools.selenium_scraping_tool.selenium_scraping_tool import ( + SeleniumScrapingTool, +) + + +def mock_driver_with_html(html_content): + driver = MagicMock() + mock_element = MagicMock() + mock_element.get_attribute.return_value = html_content + bs = BeautifulSoup(html_content, "html.parser") + mock_element.text = bs.get_text() + + driver.find_elements.return_value = [mock_element] + driver.find_element.return_value = mock_element + + return driver + + +def initialize_tool_with(mock_driver): + tool = SeleniumScrapingTool() + tool.driver = MagicMock(return_value=mock_driver) + + return tool + + +def test_tool_initialization(): + tool = SeleniumScrapingTool() + + assert tool.website_url is None + assert tool.css_element is None + assert tool.cookie is None + assert tool.wait_time == 3 + assert tool.return_html is False + + +@patch("selenium.webdriver.Chrome") +def test_scrape_without_css_selector(_mocked_chrome_driver): + html_content = "
test content
" + mock_driver = mock_driver_with_html(html_content) + tool = initialize_tool_with(mock_driver) + + result = tool._run(website_url="https://example.com") + + assert "test content" in result + mock_driver.get.assert_called_once_with("https://example.com") + mock_driver.find_element.assert_called_with("tag name", "body") + mock_driver.close.assert_called_once() + + +@patch("selenium.webdriver.Chrome") +def test_scrape_with_css_selector(_mocked_chrome_driver): + html_content = "
test content
test content in a specific div
" + mock_driver = mock_driver_with_html(html_content) + tool = initialize_tool_with(mock_driver) + + result = tool._run(website_url="https://example.com", css_element="div.test") + + assert "test content in a specific div" in result + mock_driver.get.assert_called_once_with("https://example.com") + mock_driver.find_elements.assert_called_with("css selector", "div.test") + mock_driver.close.assert_called_once() + + +@patch("selenium.webdriver.Chrome") +def test_scrape_with_return_html_true(_mocked_chrome_driver): + html_content = "
HTML content
" + mock_driver = mock_driver_with_html(html_content) + tool = initialize_tool_with(mock_driver) + + result = tool._run(website_url="https://example.com", return_html=True) + + assert html_content in result + mock_driver.get.assert_called_once_with("https://example.com") + mock_driver.find_element.assert_called_with("tag name", "body") + mock_driver.close.assert_called_once() + + +@patch("selenium.webdriver.Chrome") +def test_scrape_with_return_html_false(_mocked_chrome_driver): + html_content = "
HTML content
" + mock_driver = mock_driver_with_html(html_content) + tool = initialize_tool_with(mock_driver) + + result = tool._run(website_url="https://example.com", return_html=False) + + assert "HTML content" in result + mock_driver.get.assert_called_once_with("https://example.com") + mock_driver.find_element.assert_called_with("tag name", "body") + mock_driver.close.assert_called_once() From bb19f1c74c141fbef2d74dc32c61391fe84da412 Mon Sep 17 00:00:00 2001 From: Tom Mahler Date: Tue, 24 Dec 2024 12:12:18 +0200 Subject: [PATCH 49/69] using command list instead of string to avoid parsing issues --- .../tools/code_interpreter_tool/code_interpreter_tool.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index 61c180fe3..9588ace1e 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -79,7 +79,7 @@ class CodeInterpreterTool(BaseTool): Install missing libraries in the Docker container """ for library in libraries: - container.exec_run(f"pip install {library}") + container.exec_run(["pip", "install", library]) def _init_docker_container(self) -> docker.models.containers.Container: container_name = "code-interpreter" @@ -108,8 +108,7 @@ class CodeInterpreterTool(BaseTool): container = self._init_docker_container() self._install_libraries(container, libraries_used) - cmd_to_run = f'python3 -c "{code}"' - exec_result = container.exec_run(cmd_to_run) + exec_result = container.exec_run(["python3", "-c", code]) container.stop() container.remove() From 331840e6cc96567d442eb9ac4a88594eb28a7c76 Mon Sep 17 00:00:00 2001 From: Tom Mahler Date: Tue, 24 Dec 2024 12:17:57 +0200 Subject: [PATCH 50/69] improved imports from docker for better type hinting --- .../code_interpreter_tool.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index 9588ace1e..1809dcdda 100644 --- a/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -2,7 +2,9 @@ import importlib.util import os from typing import List, Optional, Type -import docker +from docker import from_env as docker_from_env +from docker.models.containers import Container +from docker.errors import ImageNotFound, NotFound from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -39,12 +41,12 @@ class CodeInterpreterTool(BaseTool): """ Verify if the Docker image is available. Optionally use a user-provided Dockerfile. """ - client = docker.from_env() + client = docker_from_env() try: client.images.get(self.default_image_tag) - except docker.errors.ImageNotFound: + except ImageNotFound: if self.user_dockerfile_path and os.path.exists(self.user_dockerfile_path): dockerfile_path = self.user_dockerfile_path else: @@ -73,7 +75,7 @@ class CodeInterpreterTool(BaseTool): return self.run_code_in_docker(code, libraries_used) def _install_libraries( - self, container: docker.models.containers.Container, libraries: List[str] + self, container: Container, libraries: List[str] ) -> None: """ Install missing libraries in the Docker container @@ -81,9 +83,9 @@ class CodeInterpreterTool(BaseTool): for library in libraries: container.exec_run(["pip", "install", library]) - def _init_docker_container(self) -> docker.models.containers.Container: + def _init_docker_container(self) -> Container: container_name = "code-interpreter" - client = docker.from_env() + client = docker_from_env() current_path = os.getcwd() # Check if the container is already running @@ -91,7 +93,7 @@ class CodeInterpreterTool(BaseTool): existing_container = client.containers.get(container_name) existing_container.stop() existing_container.remove() - except docker.errors.NotFound: + except NotFound: pass # Container does not exist, no need to remove return client.containers.run( From 64b98667a35f643f2a8bbedce1acb0bde4541b6f Mon Sep 17 00:00:00 2001 From: Tom Mahler Date: Tue, 24 Dec 2024 14:13:51 +0200 Subject: [PATCH 51/69] fixed code interpreter tests --- tests/tools/test_code_interpreter_tool.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/tools/test_code_interpreter_tool.py b/tests/tools/test_code_interpreter_tool.py index a9ffb9dbc..c45014e91 100644 --- a/tests/tools/test_code_interpreter_tool.py +++ b/tests/tools/test_code_interpreter_tool.py @@ -7,30 +7,30 @@ from crewai_tools.tools.code_interpreter_tool.code_interpreter_tool import ( class TestCodeInterpreterTool(unittest.TestCase): - @patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker") + @patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker_from_env") def test_run_code_in_docker(self, docker_mock): tool = CodeInterpreterTool() code = "print('Hello, World!')" - libraries_used = "numpy,pandas" + libraries_used = ["numpy", "pandas"] expected_output = "Hello, World!\n" - docker_mock.from_env().containers.run().exec_run().exit_code = 0 - docker_mock.from_env().containers.run().exec_run().output = ( + docker_mock().containers.run().exec_run().exit_code = 0 + docker_mock().containers.run().exec_run().output = ( expected_output.encode() ) result = tool.run_code_in_docker(code, libraries_used) self.assertEqual(result, expected_output) - @patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker") + @patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker_from_env") def test_run_code_in_docker_with_error(self, docker_mock): tool = CodeInterpreterTool() code = "print(1/0)" - libraries_used = "numpy,pandas" + libraries_used = ["numpy", "pandas"] expected_output = "Something went wrong while running the code: \nZeroDivisionError: division by zero\n" - docker_mock.from_env().containers.run().exec_run().exit_code = 1 - docker_mock.from_env().containers.run().exec_run().output = ( + docker_mock().containers.run().exec_run().exit_code = 1 + docker_mock().containers.run().exec_run().output = ( b"ZeroDivisionError: division by zero\n" ) result = tool.run_code_in_docker(code, libraries_used) From ba8f95964f5f967e543e44db8caaf49cb364f07b Mon Sep 17 00:00:00 2001 From: Tom Mahler Date: Tue, 24 Dec 2024 14:14:09 +0200 Subject: [PATCH 52/69] added unit testing for multi-line output --- tests/tools/test_code_interpreter_tool.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/tools/test_code_interpreter_tool.py b/tests/tools/test_code_interpreter_tool.py index c45014e91..6470c9dc1 100644 --- a/tests/tools/test_code_interpreter_tool.py +++ b/tests/tools/test_code_interpreter_tool.py @@ -36,3 +36,18 @@ class TestCodeInterpreterTool(unittest.TestCase): result = tool.run_code_in_docker(code, libraries_used) self.assertEqual(result, expected_output) + + @patch("crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.docker_from_env") + def test_run_code_in_docker_with_script(self, docker_mock): + tool = CodeInterpreterTool() + code = """print("This is line 1") +print("This is line 2")""" + libraries_used = [] # No additional libraries needed for this test + expected_output = "This is line 1\nThis is line 2\n" + + # Mock Docker responses + docker_mock().containers.run().exec_run().exit_code = 0 + docker_mock().containers.run().exec_run().output = expected_output.encode() + + result = tool.run_code_in_docker(code, libraries_used) + self.assertEqual(result, expected_output) From c7c8cd0a3cdb52234ec593f89f760e574fc36c41 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 00:54:49 +0000 Subject: [PATCH 53/69] feat: add URL validation and return_html examples - Add comprehensive URL validation in schema and _create_driver - Add URL format, length, and character validation - Add meaningful error messages for validation failures - Add return_html usage examples in README.md Co-Authored-By: Joe Moura --- .../tools/selenium_scraping_tool/README.md | 10 ++++++ .../selenium_scraping_tool.py | 36 +++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/crewai_tools/tools/selenium_scraping_tool/README.md b/src/crewai_tools/tools/selenium_scraping_tool/README.md index e2ddefba1..2d54eb970 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/README.md +++ b/src/crewai_tools/tools/selenium_scraping_tool/README.md @@ -24,6 +24,16 @@ tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.mai # Example 4: Scrape using optional parameters for customized scraping tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.main-content', cookie={'name': 'user', 'value': 'John Doe'}) + +# Example 5: Scrape content in HTML format +tool = SeleniumScrapingTool(website_url='https://example.com', return_html=True) +result = tool._run() +# Returns HTML content like: ['
Hello World
', ''] + +# Example 6: Scrape content in text format (default) +tool = SeleniumScrapingTool(website_url='https://example.com', return_html=False) +result = tool._run() +# Returns text content like: ['Hello World', 'Copyright 2024'] ``` ## Arguments diff --git a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py index 5f7d9391b..d7a55428d 100644 --- a/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py +++ b/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py @@ -1,8 +1,10 @@ +import re import time from typing import Any, Optional, Type +from urllib.parse import urlparse from crewai.tools import BaseTool -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, validator from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By @@ -15,12 +17,35 @@ class FixedSeleniumScrapingToolSchema(BaseModel): class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): """Input for SeleniumScrapingTool.""" - website_url: str = Field(..., description="Mandatory website url to read the file") + website_url: str = Field(..., description="Mandatory website url to read the file. Must start with http:// or https://") css_element: str = Field( ..., description="Mandatory css reference for element to scrape from the website", ) + @validator('website_url') + def validate_website_url(cls, v): + if not v: + raise ValueError("Website URL cannot be empty") + + if len(v) > 2048: # Common maximum URL length + raise ValueError("URL is too long (max 2048 characters)") + + if not re.match(r'^https?://', v): + raise ValueError("URL must start with http:// or https://") + + try: + result = urlparse(v) + if not all([result.scheme, result.netloc]): + raise ValueError("Invalid URL format") + except Exception as e: + raise ValueError(f"Invalid URL: {str(e)}") + + if re.search(r'\s', v): + raise ValueError("URL cannot contain whitespace") + + return v + class SeleniumScrapingTool(BaseTool): name: str = "Read a website content" @@ -103,6 +128,13 @@ class SeleniumScrapingTool(BaseTool): return elements_content def _create_driver(self, url, cookie, wait_time): + if not url: + raise ValueError("URL cannot be empty") + + # Validate URL format + if not re.match(r'^https?://', url): + raise ValueError("URL must start with http:// or https://") + options = Options() options.add_argument("--headless") driver = self.driver(options=options) From c3ebbba8aefdc5d5c6cf0be1ab855720cc2e29d5 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Sat, 28 Dec 2024 09:11:32 +0100 Subject: [PATCH 54/69] Update __init__.py --- src/crewai_tools/tools/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 23565dbea..00f992833 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -31,6 +31,7 @@ from .rag.rag_tool import RagTool from .scrape_element_from_website.scrape_element_from_website import ( ScrapeElementFromWebsiteTool, ) +from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import ScrapeGraphScrapeTool, ScrapegraphScrapeToolSchema from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ( ScrapflyScrapeWebsiteTool, From e5c47e46a8fa9b078f13f12f5973955536c4033f Mon Sep 17 00:00:00 2001 From: juliette_sivan Date: Sat, 28 Dec 2024 10:59:06 -0500 Subject: [PATCH 55/69] add import tools --- src/crewai_tools/__init__.py | 1 + src/crewai_tools/tools/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 12523a214..68e778006 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -17,6 +17,7 @@ from .tools import ( FirecrawlSearchTool, GithubSearchTool, JSONSearchTool, + LinkupSearchTool, LlamaIndexTool, MDXSearchTool, MultiOnTool, diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 23565dbea..67c9c79e7 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -20,6 +20,7 @@ from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool from .github_search_tool.github_search_tool import GithubSearchTool from .json_search_tool.json_search_tool import JSONSearchTool +from .linkup_search_tool.linkup_search_tool import LinkupSearchTool from .llamaindex_tool.llamaindex_tool import LlamaIndexTool from .mdx_seach_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool From 63e23c06c56e3ba2217b3ada35bb5af596dcfedf Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 21:55:35 +0000 Subject: [PATCH 56/69] Fix FileReadTool infinite loop by maintaining original schema Co-Authored-By: Joe Moura --- src/crewai_tools/tools/file_read_tool/file_read_tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index fe34c9d8b..8a6c2e2d8 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -27,7 +27,6 @@ class FileReadTool(BaseTool): if file_path is not None: self.file_path = file_path self.description = f"A tool that can be used to read {file_path}'s content." - self.args_schema = FixedFileReadToolSchema self._generate_description() def _run( From 5e2c38c34933aba3cfd91106a58b26d13d98545c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:26:37 +0000 Subject: [PATCH 57/69] Improve FileReadTool error handling and validation Co-Authored-By: Joe Moura --- .../tools/file_read_tool/file_read_tool.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 8a6c2e2d8..32db13f21 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -4,13 +4,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -class FixedFileReadToolSchema(BaseModel): - """Input for FileReadTool.""" - - pass - - -class FileReadToolSchema(FixedFileReadToolSchema): +class FileReadToolSchema(BaseModel): """Input for FileReadTool.""" file_path: str = Field(..., description="Mandatory file full path to read the file") @@ -33,9 +27,16 @@ class FileReadTool(BaseTool): self, **kwargs: Any, ) -> Any: + file_path = kwargs.get("file_path", self.file_path) + if file_path is None: + return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." + try: - file_path = kwargs.get("file_path", self.file_path) with open(file_path, "r") as file: return file.read() + except FileNotFoundError: + return f"Error: File not found at path: {file_path}" + except PermissionError: + return f"Error: Permission denied when trying to read file: {file_path}" except Exception as e: - return f"Fail to read the file {file_path}. Error: {e}" + return f"Error: Failed to read file {file_path}. {str(e)}" From aaf2641cc82e03324ce19c288c256269c5c18042 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:29:46 +0000 Subject: [PATCH 58/69] Add comprehensive tests for FileReadTool Co-Authored-By: Joe Moura --- tests/file_read_tool_test.py | 84 ++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/file_read_tool_test.py diff --git a/tests/file_read_tool_test.py b/tests/file_read_tool_test.py new file mode 100644 index 000000000..4646df24c --- /dev/null +++ b/tests/file_read_tool_test.py @@ -0,0 +1,84 @@ +import os +import pytest +from crewai_tools import FileReadTool + +def test_file_read_tool_constructor(): + """Test FileReadTool initialization with file_path.""" + # Create a temporary test file + test_file = "/tmp/test_file.txt" + test_content = "Hello, World!" + with open(test_file, "w") as f: + f.write(test_content) + + # Test initialization with file_path + tool = FileReadTool(file_path=test_file) + assert tool.file_path == test_file + assert "test_file.txt" in tool.description + + # Clean up + os.remove(test_file) + +def test_file_read_tool_run(): + """Test FileReadTool _run method with file_path at runtime.""" + # Create a temporary test file + test_file = "/tmp/test_file.txt" + test_content = "Hello, World!" + with open(test_file, "w") as f: + f.write(test_content) + + # Test reading file with runtime file_path + tool = FileReadTool() + result = tool._run(file_path=test_file) + assert result == test_content + + # Clean up + os.remove(test_file) + +def test_file_read_tool_error_handling(): + """Test FileReadTool error handling.""" + # Test missing file path + tool = FileReadTool() + result = tool._run() + assert "Error: No file path provided" in result + + # Test non-existent file + result = tool._run(file_path="/nonexistent/file.txt") + assert "Error: File not found at path:" in result + + # Test permission error (create a file without read permissions) + test_file = "/tmp/no_permission.txt" + with open(test_file, "w") as f: + f.write("test") + os.chmod(test_file, 0o000) + + result = tool._run(file_path=test_file) + assert "Error: Permission denied" in result + + # Clean up + os.chmod(test_file, 0o666) # Restore permissions to delete + os.remove(test_file) + +def test_file_read_tool_constructor_and_run(): + """Test FileReadTool using both constructor and runtime file paths.""" + # Create two test files + test_file1 = "/tmp/test1.txt" + test_file2 = "/tmp/test2.txt" + content1 = "File 1 content" + content2 = "File 2 content" + + with open(test_file1, "w") as f1, open(test_file2, "w") as f2: + f1.write(content1) + f2.write(content2) + + # Test that constructor file_path works + tool = FileReadTool(file_path=test_file1) + result = tool._run() + assert result == content1 + + # Test that runtime file_path overrides constructor + result = tool._run(file_path=test_file2) + assert result == content2 + + # Clean up + os.remove(test_file1) + os.remove(test_file2) From d3391d9ba4c1b3696dbbe7188aa59e6dc6ce8761 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 23:10:51 +0000 Subject: [PATCH 59/69] Add comprehensive documentation and type hints to FileReadTool Co-Authored-By: Joe Moura --- .../tools/file_read_tool/file_read_tool.py | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 32db13f21..323a26d51 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -11,22 +11,49 @@ class FileReadToolSchema(BaseModel): class FileReadTool(BaseTool): + """A tool for reading file contents. + + This tool inherits its schema handling from BaseTool to avoid recursive schema + definition issues. The args_schema is set to FileReadToolSchema which defines + the required file_path parameter. The schema should not be overridden in the + constructor as it would break the inheritance chain and cause infinite loops. + + The tool supports two ways of specifying the file path: + 1. At construction time via the file_path parameter + 2. At runtime via the file_path parameter in the tool's input + + Args: + file_path (Optional[str]): Path to the file to be read. If provided, + this becomes the default file path for the tool. + **kwargs: Additional keyword arguments passed to BaseTool. + + Example: + >>> tool = FileReadTool(file_path="/path/to/file.txt") + >>> content = tool.run() # Reads /path/to/file.txt + >>> content = tool.run(file_path="/path/to/other.txt") # Reads other.txt + """ name: str = "Read a file's content" - description: str = "A tool that can be used to read a file's content." + description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read." args_schema: Type[BaseModel] = FileReadToolSchema file_path: Optional[str] = None - def __init__(self, file_path: Optional[str] = None, **kwargs): + def __init__(self, file_path: Optional[str] = None, **kwargs: Any) -> None: + """Initialize the FileReadTool. + + Args: + file_path (Optional[str]): Path to the file to be read. If provided, + this becomes the default file path for the tool. + **kwargs: Additional keyword arguments passed to BaseTool. + """ super().__init__(**kwargs) if file_path is not None: self.file_path = file_path - self.description = f"A tool that can be used to read {file_path}'s content." - self._generate_description() + self.description = f"A tool that reads file content. The default file is {file_path}, but you can provide a different 'file_path' parameter to read another file." def _run( self, **kwargs: Any, - ) -> Any: + ) -> str: file_path = kwargs.get("file_path", self.file_path) if file_path is None: return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." @@ -40,3 +67,15 @@ class FileReadTool(BaseTool): return f"Error: Permission denied when trying to read file: {file_path}" except Exception as e: return f"Error: Failed to read file {file_path}. {str(e)}" + + def _generate_description(self) -> None: + """Generate the tool description based on file path. + + This method updates the tool's description to include information about + the default file path while maintaining the ability to specify a different + file at runtime. + + Returns: + None + """ + self.description = f"A tool that can be used to read {self.file_path}'s content." From 029afd3e145030ed6a0d0141a899beaa75311099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Sun, 29 Dec 2024 12:23:08 -0300 Subject: [PATCH 60/69] Update __init__.py --- src/crewai_tools/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 87aca8531..65a90a01b 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -26,6 +26,8 @@ from .tools import ( PGSearchTool, RagTool, ScrapeElementFromWebsiteTool, + ScrapeGraphScrapeTool, + ScrapegraphScrapeToolSchema ScrapeWebsiteTool, ScrapflyScrapeWebsiteTool, SeleniumScrapingTool, From 4c7ce3a94548d7768d676fb236bb1b1fb72dfbbe Mon Sep 17 00:00:00 2001 From: Priyanshupareek <37779762+Priyanshupareek@users.noreply.github.com> Date: Thu, 2 Jan 2025 00:54:48 +0530 Subject: [PATCH 61/69] Update browserbase_load_tool.py --- .../browserbase_load_tool/browserbase_load_tool.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index 54c33db3c..0a848fc7b 100644 --- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -1,8 +1,9 @@ from typing import Any, Optional, Type - -from crewai.tools import BaseTool +import os from pydantic import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + class BrowserbaseLoadToolSchema(BaseModel): url: str = Field(description="Website URL") @@ -14,8 +15,8 @@ class BrowserbaseLoadTool(BaseTool): "Load webpages url in a headless browser using Browserbase and return the contents" ) args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema - api_key: Optional[str] = None - project_id: Optional[str] = None + api_key: Optional[str] = os.getenv('BROWSERBASE_API_KEY') + project_id: Optional[str] = os.getenv('BROWSERBASE_PROJECT_ID') text_content: Optional[bool] = False session_id: Optional[str] = None proxy: Optional[bool] = None @@ -38,7 +39,7 @@ class BrowserbaseLoadTool(BaseTool): "`browserbase` package not found, please run `pip install browserbase`" ) - self.browserbase = Browserbase(api_key, project_id) + self.browserbase = Browserbase(api_key=self.api_key) self.text_content = text_content self.session_id = session_id self.proxy = proxy From 954dd43c17cc4dde498ad90bf31612d3e5e95aa0 Mon Sep 17 00:00:00 2001 From: Priyanshupareek <37779762+Priyanshupareek@users.noreply.github.com> Date: Thu, 2 Jan 2025 01:34:42 +0530 Subject: [PATCH 62/69] Update browserbase_load_tool.py --- .../tools/browserbase_load_tool/browserbase_load_tool.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index 0a848fc7b..95e4084fd 100644 --- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -1,5 +1,5 @@ -from typing import Any, Optional, Type import os +from typing import Any, Optional, Type from pydantic import BaseModel, Field from crewai_tools.tools.base_tool import BaseTool @@ -32,6 +32,8 @@ class BrowserbaseLoadTool(BaseTool): **kwargs, ): super().__init__(**kwargs) + if not self.api_key: + raise EnvironmentError("BROWSERBASE_API_KEY environment variable is required for initialization") try: from browserbase import Browserbase # type: ignore except ImportError: From e0c6ec5bd316c9c373826dd15c34b34664bdd84e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Thu, 2 Jan 2025 20:51:14 -0300 Subject: [PATCH 63/69] fix imports --- src/crewai_tools/__init__.py | 4 ++-- src/crewai_tools/tools/__init__.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index ba779e5ac..890dc36f8 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -27,8 +27,8 @@ from .tools import ( PGSearchTool, RagTool, ScrapeElementFromWebsiteTool, - ScrapeGraphScrapeTool, - ScrapegraphScrapeToolSchema + ScrapegraphScrapeTool, + ScrapegraphScrapeToolSchema, ScrapeWebsiteTool, ScrapflyScrapeWebsiteTool, SeleniumScrapingTool, diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index d6faccc98..c8ee55084 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -20,7 +20,7 @@ from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import ( from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool from .github_search_tool.github_search_tool import GithubSearchTool from .json_search_tool.json_search_tool import JSONSearchTool -from .linkup_search_tool.linkup_search_tool import LinkupSearchTool +from .linkup.linkup_search_tool import LinkupSearchTool from .llamaindex_tool.llamaindex_tool import LlamaIndexTool from .mdx_seach_tool.mdx_search_tool import MDXSearchTool from .multion_tool.multion_tool import MultiOnTool @@ -32,7 +32,7 @@ from .rag.rag_tool import RagTool from .scrape_element_from_website.scrape_element_from_website import ( ScrapeElementFromWebsiteTool, ) -from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import ScrapeGraphScrapeTool, ScrapegraphScrapeToolSchema +from .scrapegraph_scrape_tool.scrapegraph_scrape_tool import ScrapegraphScrapeTool, ScrapegraphScrapeToolSchema from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import ( ScrapflyScrapeWebsiteTool, @@ -55,4 +55,4 @@ from .youtube_channel_search_tool.youtube_channel_search_tool import ( from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool from .weaviate_tool.vector_search import WeaviateVectorSearchTool from .serpapi_tool.serpapi_google_search_tool import SerpApiGoogleSearchTool -from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool \ No newline at end of file +from .serpapi_tool.serpapi_google_shopping_tool import SerpApiGoogleShoppingTool From 16cdabbf3513abc2333c595cf67996ab0f25a13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Fri, 3 Jan 2025 02:53:24 -0300 Subject: [PATCH 64/69] bumping verison fixing tests --- .../browserbase_load_tool.py | 2 +- tests/base_tool_test.py | 19 ++++++++++--------- tests/spider_tool_test.py | 12 ++++++------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index 95e4084fd..2ca1b95fc 100644 --- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -2,7 +2,7 @@ import os from typing import Any, Optional, Type from pydantic import BaseModel, Field -from crewai_tools.tools.base_tool import BaseTool +from crewai.tools import BaseTool class BrowserbaseLoadToolSchema(BaseModel): diff --git a/tests/base_tool_test.py b/tests/base_tool_test.py index 949a445c2..4a4e40783 100644 --- a/tests/base_tool_test.py +++ b/tests/base_tool_test.py @@ -1,5 +1,6 @@ from typing import Callable -from crewai_tools import BaseTool, tool +from crewai.tools import BaseTool, tool +from crewai.tools.base_tool import to_langchain def test_creating_a_tool_using_annotation(): @tool("Name of my tool") @@ -9,14 +10,14 @@ def test_creating_a_tool_using_annotation(): # Assert all the right attributes were defined assert my_tool.name == "Name of my tool" - assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." + assert my_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it." assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} assert my_tool.func("What is the meaning of life?") == "What is the meaning of life?" # Assert the langchain tool conversion worked as expected - converted_tool = my_tool.to_langchain() + converted_tool = to_langchain([my_tool])[0] assert converted_tool.name == "Name of my tool" - assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." + assert converted_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it." assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} assert converted_tool.func("What is the meaning of life?") == "What is the meaning of life?" @@ -31,16 +32,16 @@ def test_creating_a_tool_using_baseclass(): my_tool = MyCustomTool() # Assert all the right attributes were defined assert my_tool.name == "Name of my tool" - assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." + assert my_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it." assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} - assert my_tool.run("What is the meaning of life?") == "What is the meaning of life?" + assert my_tool._run("What is the meaning of life?") == "What is the meaning of life?" # Assert the langchain tool conversion worked as expected - converted_tool = my_tool.to_langchain() + converted_tool = to_langchain([my_tool])[0] assert converted_tool.name == "Name of my tool" - assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." + assert converted_tool.description == "Tool Name: Name of my tool\nTool Arguments: {'question': {'description': None, 'type': 'str'}}\nTool Description: Clear description for what this tool is useful for, you agent will need this information to use it." assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} - assert converted_tool.run("What is the meaning of life?") == "What is the meaning of life?" + assert converted_tool.invoke({"question": "What is the meaning of life?"}) == "What is the meaning of life?" def test_setting_cache_function(): class MyCustomTool(BaseTool): diff --git a/tests/spider_tool_test.py b/tests/spider_tool_test.py index 977dd8769..264394777 100644 --- a/tests/spider_tool_test.py +++ b/tests/spider_tool_test.py @@ -3,7 +3,7 @@ from crewai import Agent, Task, Crew def test_spider_tool(): spider_tool = SpiderTool() - + searcher = Agent( role="Web Research Expert", goal="Find related information from specific URL's", @@ -12,7 +12,7 @@ def test_spider_tool(): verbose=True, cache=False ) - + choose_between_scrape_crawl = Task( description="Scrape the page of spider.cloud and return a summary of how fast it is", expected_output="spider.cloud is a fast scraping and crawling tool", @@ -34,13 +34,13 @@ def test_spider_tool(): crew = Crew( agents=[searcher], tasks=[ - choose_between_scrape_crawl, - return_metadata, + choose_between_scrape_crawl, + return_metadata, css_selector ], - verbose=2 + verbose=True ) - + crew.kickoff() if __name__ == "__main__": From 8047ee067cae2a416f9c3c1d3abde20e92452394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Fri, 3 Jan 2025 03:34:34 -0300 Subject: [PATCH 65/69] treating for uninstalled dependencies --- src/crewai_tools/tools/linkup/linkup_search_tool.py | 12 +++++++++++- src/crewai_tools/tools/spider_tool/spider_tool.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/crewai_tools/tools/linkup/linkup_search_tool.py b/src/crewai_tools/tools/linkup/linkup_search_tool.py index 8ddb81527..3106a7c0c 100644 --- a/src/crewai_tools/tools/linkup/linkup_search_tool.py +++ b/src/crewai_tools/tools/linkup/linkup_search_tool.py @@ -1,4 +1,9 @@ -from linkup import LinkupClient +try: + from linkup import LinkupClient + LINKUP_AVAILABLE = True +except ImportError: + LINKUP_AVAILABLE = False + from pydantic import PrivateAttr class LinkupSearchTool: @@ -10,6 +15,11 @@ class LinkupSearchTool: """ Initialize the tool with an API key. """ + if not LINKUP_AVAILABLE: + raise ImportError( + "The 'linkup' package is required to use the LinkupSearchTool. " + "Please install it with: uv add linkup" + ) self._client = LinkupClient(api_key=api_key) def _run(self, query: str, depth: str = "standard", output_type: str = "searchResults") -> dict: diff --git a/src/crewai_tools/tools/spider_tool/spider_tool.py b/src/crewai_tools/tools/spider_tool/spider_tool.py index 40959612f..87726f0bc 100644 --- a/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -90,7 +90,7 @@ class SpiderTool(BaseTool): self.spider = Spider(api_key=api_key) except ImportError: raise ImportError( - "`spider-client` package not found, please run `pip install spider-client`" + "`spider-client` package not found, please run `uv add spider-client`" ) except Exception as e: raise RuntimeError(f"Failed to initialize Spider client: {str(e)}") From fa901453feea21a380da749454d5e38bf56464ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Fri, 3 Jan 2025 04:15:39 -0300 Subject: [PATCH 66/69] new version --- .../tools/linkup/linkup_search_tool.py | 5 ++- .../tools/weaviate_tool/vector_search.py | 33 +++++++++++-------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/crewai_tools/tools/linkup/linkup_search_tool.py b/src/crewai_tools/tools/linkup/linkup_search_tool.py index 3106a7c0c..b172ad029 100644 --- a/src/crewai_tools/tools/linkup/linkup_search_tool.py +++ b/src/crewai_tools/tools/linkup/linkup_search_tool.py @@ -1,15 +1,18 @@ +from typing import Any + try: from linkup import LinkupClient LINKUP_AVAILABLE = True except ImportError: LINKUP_AVAILABLE = False + LinkupClient = Any # type placeholder when package is not available from pydantic import PrivateAttr class LinkupSearchTool: name: str = "Linkup Search Tool" description: str = "Performs an API call to Linkup to retrieve contextual information." - _client: LinkupClient = PrivateAttr() + _client: LinkupClient = PrivateAttr() # type: ignore def __init__(self, api_key: str): """ diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py index ab80b6ce1..a9c7ce519 100644 --- a/src/crewai_tools/tools/weaviate_tool/vector_search.py +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -1,12 +1,20 @@ +from typing import Any, Type, Optional import os import json -import weaviate -from pydantic import BaseModel, Field -from typing import Type, Optional -from crewai.tools import BaseTool +try: + import weaviate + from weaviate.classes.config import Configure, Vectorizers + from weaviate.classes.init import Auth + WEAVIATE_AVAILABLE = True +except ImportError: + WEAVIATE_AVAILABLE = False + weaviate = Any # type placeholder + Configure = Any + Vectorizers = Any + Auth = Any -from weaviate.classes.config import Configure, Vectorizers -from weaviate.classes.init import Auth +from pydantic import BaseModel, Field +from crewai.tools import BaseTool class WeaviateToolSchema(BaseModel): @@ -51,14 +59,11 @@ class WeaviateVectorSearchTool(BaseTool): ) def _run(self, query: str) -> str: - """Search the Weaviate database - - Args: - query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question. - - Returns: - str: The result of the search query - """ + if not WEAVIATE_AVAILABLE: + raise ImportError( + "The 'weaviate-client' package is required to use the WeaviateVectorSearchTool. " + "Please install it with: uv add weaviate-client" + ) if not self.weaviate_cluster_url or not self.weaviate_api_key: raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set") From 7efc092873aed3487e65eef94b81f2a0fa196479 Mon Sep 17 00:00:00 2001 From: Eduardo Chiarotti Date: Fri, 3 Jan 2025 08:47:52 -0300 Subject: [PATCH 67/69] fix: weaviate init parameters --- .../tools/weaviate_tool/vector_search.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py index a9c7ce519..fc5641009 100644 --- a/src/crewai_tools/tools/weaviate_tool/vector_search.py +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -1,10 +1,12 @@ -from typing import Any, Type, Optional -import os import json +import os +from typing import Any, Optional, Type + try: import weaviate from weaviate.classes.config import Configure, Vectorizers from weaviate.classes.init import Auth + WEAVIATE_AVAILABLE = True except ImportError: WEAVIATE_AVAILABLE = False @@ -14,6 +16,7 @@ except ImportError: Auth = Any from pydantic import BaseModel, Field + from crewai.tools import BaseTool @@ -34,16 +37,8 @@ class WeaviateVectorSearchTool(BaseTool): args_schema: Type[BaseModel] = WeaviateToolSchema query: Optional[str] = None - vectorizer: Optional[Vectorizers] = Field( - default=Configure.Vectorizer.text2vec_openai( - model="nomic-embed-text", - ) - ) - generative_model: Optional[str] = Field( - default=Configure.Generative.openai( - model="gpt-4o", - ), - ) + vectorizer: Optional[Vectorizers] = None + generative_model: Optional[str] = None collection_name: Optional[str] = None limit: Optional[int] = Field(default=3) headers: Optional[dict] = Field( @@ -58,6 +53,19 @@ class WeaviateVectorSearchTool(BaseTool): description="The API key for the Weaviate cluster", ) + def __init__(self, **kwargs): + super().__init__(**kwargs) + if WEAVIATE_AVAILABLE: + self.vectorizer = self.vectorizer or Configure.Vectorizer.text2vec_openai( + model="nomic-embed-text", + ) + self.generative_model = ( + self.generative_model + or Configure.Generative.openai( + model="gpt-4o", + ) + ) + def _run(self, query: str) -> str: if not WEAVIATE_AVAILABLE: raise ImportError( From c31a8d6ee2e5207ef88b4ef4e6c5dba5ebbca7d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Fri, 3 Jan 2025 09:58:58 -0300 Subject: [PATCH 68/69] fix --- src/crewai_tools/tools/weaviate_tool/vector_search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py index fc5641009..bfe80f966 100644 --- a/src/crewai_tools/tools/weaviate_tool/vector_search.py +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -36,7 +36,6 @@ class WeaviateVectorSearchTool(BaseTool): description: str = "A tool to search the Weaviate database for relevant information on internal documents." args_schema: Type[BaseModel] = WeaviateToolSchema query: Optional[str] = None - vectorizer: Optional[Vectorizers] = None generative_model: Optional[str] = None collection_name: Optional[str] = None From aafcf992ab4f8df96a994b73a126a085f695524f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Fri, 3 Jan 2025 10:03:53 -0300 Subject: [PATCH 69/69] fix weviate tool --- src/crewai_tools/tools/weaviate_tool/vector_search.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/crewai_tools/tools/weaviate_tool/vector_search.py b/src/crewai_tools/tools/weaviate_tool/vector_search.py index bfe80f966..14e10d7c5 100644 --- a/src/crewai_tools/tools/weaviate_tool/vector_search.py +++ b/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -40,9 +40,7 @@ class WeaviateVectorSearchTool(BaseTool): generative_model: Optional[str] = None collection_name: Optional[str] = None limit: Optional[int] = Field(default=3) - headers: Optional[dict] = Field( - default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]} - ) + headers: Optional[dict] = None weaviate_cluster_url: str = Field( ..., description="The URL of the Weaviate cluster", @@ -55,6 +53,12 @@ class WeaviateVectorSearchTool(BaseTool): def __init__(self, **kwargs): super().__init__(**kwargs) if WEAVIATE_AVAILABLE: + openai_api_key = os.environ.get("OPENAI_API_KEY") + if not openai_api_key: + raise ValueError( + "OPENAI_API_KEY environment variable is required for WeaviateVectorSearchTool and it is mandatory to use the tool." + ) + self.headers = {"X-OpenAI-Api-Key": openai_api_key} self.vectorizer = self.vectorizer or Configure.Vectorizer.text2vec_openai( model="nomic-embed-text", )