From ec97e15a3a3601311b98f254270315f5d7c843cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Thu, 29 Feb 2024 03:09:48 -0300 Subject: [PATCH] Adding new description generator --- src/crewai_tools/tools/base_tool.py | 9 +++++++++ .../code_docs_search_tool.py | 1 + .../tools/csv_search_tool/csv_search_tool.py | 1 + .../directory_read_tool.py | 1 + .../directory_search_tool.py | 1 + .../docx_search_tool/docx_search_tool.py | 1 + .../tools/file_read_tool/file_read_tool.py | 1 + .../github_search_tool/github_search_tool.py | 1 + .../json_search_tool/json_search_tool.py | 1 + .../tools/mdx_seach_tool/mdx_search_tool.py | 1 + .../tools/pdf_search_tool/pdf_search_tool.py | 1 + .../tools/pg_seach_tool/pg_search_tool.py | 1 + .../scrape_element_from_website.py | 19 ++++++++++++++----- .../scrape_website_tool.py | 19 ++++++++++++++----- .../tools/txt_search_tool/txt_search_tool.py | 1 + .../website_search/website_search_tool.py | 1 + .../tools/xml_search_tool/xml_search_tool.py | 1 + .../youtube_channel_search_tool.py | 1 + .../youtube_video_search_tool.py | 1 + 19 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/crewai_tools/tools/base_tool.py b/src/crewai_tools/tools/base_tool.py index dc679f833..2f19184ea 100644 --- a/src/crewai_tools/tools/base_tool.py +++ b/src/crewai_tools/tools/base_tool.py @@ -13,10 +13,12 @@ class BaseTool(BaseModel, ABC): """Used to tell the model how/when/why to use the tool.""" args_schema: Optional[Type[V1BaseModel]] = None """The schema for the arguments that the tool accepts.""" + description_updated: bool = False @model_validator(mode="after") def _check_args_schema(self): self._set_args_schema() + self._generate_description() return self def run( @@ -56,6 +58,13 @@ class BaseTool(BaseModel, ABC): }, }, ) + def _generate_description(self): + args = [] + for arg, attribute in self.args_schema.schema()['properties'].items(): + args.append(f"{arg}: '{attribute['type']}'") + + description = self.description.replace('\n', ' ') + self.description = f"{self.name}({', '.join(args)}) - {description}" class Tool(BaseTool): diff --git a/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py b/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py index fd0acf4ca..54ba69d01 100644 --- a/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py +++ b/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py @@ -28,6 +28,7 @@ class CodeDocsSearchTool(RagTool): self.docs_url = docs_url self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content." self.args_schema = FixedCodeDocsSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py b/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py index 8cc06e263..dcfdd82c8 100644 --- a/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py +++ b/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py @@ -28,6 +28,7 @@ class CSVSearchTool(RagTool): self.csv = csv self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content." self.args_schema = FixedCSVSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 94fcce076..8b569e5f6 100644 --- a/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -23,6 +23,7 @@ class DirectoryReadTool(BaseTool): self.directory = directory self.description = f"A tool that can be used to list {directory}'s content." self.args_schema = FixedDirectoryReadToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py b/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py index 39c34fc93..2cd888a8b 100644 --- a/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py +++ b/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py @@ -28,6 +28,7 @@ class DirectorySearchTool(RagTool): self.directory = directory self.description = f"A tool that can be used to semantic search a query the {directory} directory's content." self.args_schema = FixedDirectorySearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py b/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py index 1a52e5f3b..135837a6b 100644 --- a/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py +++ b/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py @@ -28,6 +28,7 @@ class DOCXSearchTool(RagTool): self.docx = docx self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content." self.args_schema = FixedDOCXSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 8c2e8dcca..8c7643852 100644 --- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -22,6 +22,7 @@ class FileReadTool(BaseTool): self.file_path = file_path self.description = f"A tool that can be used to read {file_path}'s content." self.args_schema = FixedFileReadToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/github_search_tool/github_search_tool.py b/src/crewai_tools/tools/github_search_tool/github_search_tool.py index 3b90f16ea..cb2815aad 100644 --- a/src/crewai_tools/tools/github_search_tool/github_search_tool.py +++ b/src/crewai_tools/tools/github_search_tool/github_search_tool.py @@ -31,6 +31,7 @@ class GithubSearchTool(RagTool): self.github_repo = github_repo self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content." self.args_schema = FixedGithubSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/json_search_tool/json_search_tool.py b/src/crewai_tools/tools/json_search_tool/json_search_tool.py index 89e515e78..578f06bc9 100644 --- a/src/crewai_tools/tools/json_search_tool/json_search_tool.py +++ b/src/crewai_tools/tools/json_search_tool/json_search_tool.py @@ -28,6 +28,7 @@ class JSONSearchTool(RagTool): self.json_path = json_path self.description = f"A tool that can be used to semantic search a query the {json} JSON's content." self.args_schema = FixedJSONSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py b/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py index 0f4deb056..e34c0fa08 100644 --- a/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py +++ b/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py @@ -28,6 +28,7 @@ class MDXSearchTool(RagTool): self.mdx = mdx self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content." self.args_schema = FixedMDXSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py b/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py index ba54e34ca..bb85673ba 100644 --- a/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py +++ b/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py @@ -28,6 +28,7 @@ class PDFSearchTool(RagTool): self.pdf = pdf self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content." self.args_schema = FixedPDFSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py b/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py index f625bebc9..8b9707185 100644 --- a/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py +++ b/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py @@ -24,6 +24,7 @@ class PGSearchTool(RagTool): if table_name is not None: self.table_name = table_name self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content." + self._generate_description() else: raise('To use PGSearchTool, you must provide a `table_name` argument') diff --git a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index bee6c22ab..36bc088e5 100644 --- a/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -1,3 +1,4 @@ +import os import requests from bs4 import BeautifulSoup from typing import Optional, Type, Any @@ -18,20 +19,28 @@ class ScrapeElementFromWebsiteTool(BaseTool): description: str = "A tool that can be used to read a website content." args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema website_url: Optional[str] = None + cookies: Optional[dict] = None css_element: Optional[str] = None headers: Optional[dict] = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', - 'Accept-Language': 'en-US,en;q=0.5', - 'Referer': 'https://www.google.com/' + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Accept-Language': 'en-US,en;q=0.9', + 'Referer': 'https://www.google.com/', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Accept-Encoding': 'gzip, deflate, br' } - def __init__(self, website_url: Optional[str] = None, css_element: Optional[str] = None, **kwargs): + def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs): super().__init__(**kwargs) if website_url is not None: self.website_url = website_url self.css_element = css_element self.description = f"A tool that can be used to read {website_url}'s content." self.args_schema = FixedScrapeElementFromWebsiteToolSchema + self._generate_description() + if cookies is not None: + self.cookies = {cookies["name"]: os.getenv(cookies["value"])} def _run( self, @@ -39,7 +48,7 @@ class ScrapeElementFromWebsiteTool(BaseTool): ) -> Any: website_url = kwargs.get('website_url', self.website_url) css_element = kwargs.get('css_element', self.css_element) - page = requests.get(website_url, headers=self.headers) + page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {}) parsed = BeautifulSoup(page.content, "html.parser") elements = parsed.select(css_element) return "\n".join([element.get_text() for element in elements]) diff --git a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 240948a33..623d785dc 100644 --- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -1,3 +1,4 @@ +import os import requests from bs4 import BeautifulSoup from typing import Optional, Type, Any @@ -17,25 +18,33 @@ class ScrapeWebsiteTool(BaseTool): description: str = "A tool that can be used to read a website content." args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema website_url: Optional[str] = None + cookies: Optional[dict] = None headers: Optional[dict] = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', - 'Accept-Language': 'en-US,en;q=0.5', - 'Referer': 'https://www.google.com/' + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Accept-Language': 'en-US,en;q=0.9', + 'Referer': 'https://www.google.com/', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Accept-Encoding': 'gzip, deflate, br' } - def __init__(self, website_url: Optional[str] = None, **kwargs): + def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs): super().__init__(**kwargs) if website_url is not None: self.website_url = website_url self.description = f"A tool that can be used to read {website_url}'s content." self.args_schema = FixedScrapeWebsiteToolSchema + self._generate_description() + if cookies is not None: + self.cookies = {cookies["name"]: os.getenv(cookies["value"])} def _run( self, **kwargs: Any, ) -> Any: website_url = kwargs.get('website_url', self.website_url) - page = requests.get(website_url, headers=self.headers) + page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {}) parsed = BeautifulSoup(page.content, "html.parser") return parsed.get_text() diff --git a/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py b/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py index 130f6f164..0a61eae53 100644 --- a/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py +++ b/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py @@ -27,6 +27,7 @@ class TXTSearchTool(RagTool): self.txt = txt self.description = f"A tool that can be used to semantic search a query the {txt} txt's content." self.args_schema = FixedTXTSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/website_search/website_search_tool.py b/src/crewai_tools/tools/website_search/website_search_tool.py index f4cffa9c9..37744f2b6 100644 --- a/src/crewai_tools/tools/website_search/website_search_tool.py +++ b/src/crewai_tools/tools/website_search/website_search_tool.py @@ -28,6 +28,7 @@ class WebsiteSearchTool(RagTool): self.website = website self.description = f"A tool that can be used to semantic search a query from {website} website content." self.args_schema = FixedWebsiteSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py b/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py index 9259b819f..90cedfa56 100644 --- a/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py +++ b/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py @@ -28,6 +28,7 @@ class XMLSearchTool(RagTool): self.xml = xml self.description = f"A tool that can be used to semantic search a query the {xml} XML's content." self.args_schema = FixedXMLSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py b/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py index 9b4e51688..fcdfe78c9 100644 --- a/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py +++ b/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py @@ -28,6 +28,7 @@ class YoutubeChannelSearchTool(RagTool): self.youtube_channel_handle = youtube_channel_handle self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content." self.args_schema = FixedYoutubeChannelSearchToolSchema + self._generate_description() def _run( self, diff --git a/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py b/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py index 7b26c8e90..20aa9691d 100644 --- a/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py +++ b/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py @@ -28,6 +28,7 @@ class YoutubeVideoSearchTool(RagTool): self.youtube_video_url = youtube_video_url self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content." self.args_schema = FixedYoutubeVideoSearchToolSchema + self._generate_description() def _run( self,