Merge branch 'main' into jeroen-vet-patch-1

2026-01-09 08:08:32 +00:00 · 2024-05-02 02:26:48 -03:00
parent d4fc993f1e 843c982ba9
commit 5d64a5c41c
23 changed files with 232 additions and 42 deletions
--- a/src/crewai_tools/init.py
+++ b/src/crewai_tools/init.py
@@ -1,5 +1,6 @@
 from .tools.base_tool import BaseTool, Tool, tool
 from .tools import (
+	BrowserbaseLoadTool,
 	CodeDocsSearchTool,
 	CSVSearchTool,
 	DirectorySearchTool,
--- a/src/crewai_tools/tools/init.py
+++ b/src/crewai_tools/tools/init.py
@@ -1,3 +1,4 @@
+from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
 from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
 from .csv_search_tool.csv_search_tool import CSVSearchTool
 from .directory_search_tool.directory_search_tool import DirectorySearchTool
@@ -18,4 +19,4 @@ from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
 from .website_search.website_search_tool import WebsiteSearchTool
 from .xml_search_tool.xml_search_tool import XMLSearchTool
 from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
-from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
+from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
--- a/src/crewai_tools/tools/base_tool.py
+++ b/src/crewai_tools/tools/base_tool.py
@@ -20,7 +20,7 @@ class BaseTool(BaseModel, ABC):
    """The schema for the arguments that the tool accepts."""
    description_updated: bool = False
    """Flag to check if the description has been updated."""
-    cache_function: Optional[Callable] = lambda: True
+    cache_function: Optional[Callable] = lambda _args, _result: True
    """Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""

    @validator("args_schema", always=True, pre=True)
--- a/src/crewai_tools/tools/browserbase_load_tool/README.md
+++ b/src/crewai_tools/tools/browserbase_load_tool/README.md
@@ -0,0 +1,29 @@
+# BrowserbaseLoadTool
+
+## Description
+
+[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving.
+
+## Installation
+
+- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`).
+- Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `crewai[tools]` package:
+
+```
+pip install browserbase 'crewai[tools]'
+```
+
+## Example
+
+Utilize the BrowserbaseLoadTool as follows to allow your agent to load websites:
+
+```python
+from crewai_tools import BrowserbaseLoadTool
+
+tool = BrowserbaseLoadTool()
+```
+
+## Arguments
+
+- `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
+- `text_content`: Optional. Load pages as readable text. Default is `False`.
--- a/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py
+++ b/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py
@@ -0,0 +1,29 @@
+from crewai_tools import BaseTool
+from typing import Optional, Any
+from pydantic.v1 import BaseModel, Field
+
+class BrowserbaseLoadToolSchema(BaseModel):
+    url: str = Field(description="Website URL")
+
+class BrowserbaseLoadTool(BaseTool):
+    name: str = "Browserbase web load tool"
+    description: str = "Load webpages in a headless browser using Browserbase and return the contents"
+    args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
+    api_key: Optional[str] = None
+    text_content: Optional[bool] = False
+    browserbase: Optional[Any] = None
+
+    def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs):
+        super().__init__(**kwargs)
+        try:
+            from browserbase import Browserbase
+        except ImportError:
+           raise ImportError(
+               "`browserbase` package not found, please run `pip install browserbase`"
+           )
+
+        self.browserbase = Browserbase(api_key=api_key)
+        self.text_content = text_content
+
+    def _run(self, url: str):
+        return self.browserbase.load_url(url, text_content=self.text_content)
--- a/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py
+++ b/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py
@@ -34,6 +34,7 @@ class CodeDocsSearchTool(RagTool):
            self.add(docs_url)
            self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content."
            self.args_schema = FixedCodeDocsSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class CodeDocsSearchTool(RagTool):
    ) -> Any:
        if "docs_url" in kwargs:
            self.add(kwargs["docs_url"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py
+++ b/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py
@@ -34,6 +34,7 @@ class CSVSearchTool(RagTool):
            self.add(csv)
            self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content."
            self.args_schema = FixedCSVSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class CSVSearchTool(RagTool):
    ) -> Any:
        if "csv" in kwargs:
            self.add(kwargs["csv"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py
+++ b/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py
@@ -34,6 +34,7 @@ class DirectorySearchTool(RagTool):
            self.add(directory)
            self.description = f"A tool that can be used to semantic search a query the {directory} directory's content."
            self.args_schema = FixedDirectorySearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class DirectorySearchTool(RagTool):
    ) -> Any:
        if "directory" in kwargs:
            self.add(kwargs["directory"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py
+++ b/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py
@@ -34,6 +34,7 @@ class DOCXSearchTool(RagTool):
            self.add(docx)
            self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content."
            self.args_schema = FixedDOCXSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class DOCXSearchTool(RagTool):
    ) -> Any:
        if "docx" in kwargs:
            self.add(kwargs["docx"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/file_read_tool/file_read_tool.py
+++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py
@@ -2,32 +2,45 @@ from typing import Optional, Type, Any
 from pydantic.v1 import BaseModel, Field
 from ..base_tool import BaseTool

+
 class FixedFileReadToolSchema(BaseModel):
-	"""Input for FileReadTool."""
-	pass
+    """Input for FileReadTool."""
+    pass
+

 class FileReadToolSchema(FixedFileReadToolSchema):
-	"""Input for FileReadTool."""
-	file_path: str = Field(..., description="Mandatory file full path to read the file")
+    """Input for FileReadTool."""
+    file_path: str = Field(
+        ...,
+        description="Mandatory file full path to read the file"
+    )
+

 class FileReadTool(BaseTool):
-	name: str = "Read a file's content"
-	description: str = "A tool that can be used to read a file's content."
-	args_schema: Type[BaseModel] = FileReadToolSchema
-	file_path: Optional[str] = None
+    name: str = "Read a file's content"
+    description: str = "A tool that can be used to read a file's content."
+    args_schema: Type[BaseModel] = FileReadToolSchema
+    file_path: Optional[str] = None

-	def __init__(self, file_path: Optional[str] = None, **kwargs):
-		super().__init__(**kwargs)
-		if file_path is not None:
-			self.file_path = file_path
-			self.description = f"A tool that can be used to read {file_path}'s content."
-			self.args_schema = FixedFileReadToolSchema
-			self._generate_description()
+    def __init__(
+        self,
+        file_path: Optional[str] = None,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        if file_path is not None:
+            self.file_path = file_path
+            self.description = f"A tool that can be used to read {file_path}'s content."
+            self.args_schema = FixedFileReadToolSchema
+            self._generate_description()

-	def _run(
-		self,
-		**kwargs: Any,
-	) -> Any:
-		file_path = kwargs.get('file_path', self.file_path)
-		with open(file_path, 'r') as file:
-			return file.read()
+    def _run(
+        self,
+        **kwargs: Any,
+    ) -> Any:
+        try:
+            file_path = kwargs.get('file_path', self.file_path)
+            with open(file_path, 'r') as file:
+                return file.read()
+        except Exception as e:
+            return f"Fail to read the file {file_path}. Error: {e}"
--- a/src/crewai_tools/tools/github_search_tool/README.md
+++ b/src/crewai_tools/tools/github_search_tool/README.md
@@ -1,24 +1,25 @@
-# GitHubSearchTool
+# GithubSearchTool

 ## Description
-The GitHubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
+The GithubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.

 ## Installation
-To use the GitHubSearchTool, first ensure the crewai_tools package is installed in your Python environment:
+To use the GithubSearchTool, first ensure the crewai_tools package is installed in your Python environment:

 ```shell
 pip install 'crewai[tools]'
 ```

-This command installs the necessary package to run the GitHubSearchTool along with any other tools included in the crewai_tools package.
+This command installs the necessary package to run the GithubSearchTool along with any other tools included in the crewai_tools package.

 ## Example
-Here’s how you can use the GitHubSearchTool to perform semantic searches within a GitHub repository:
+Here’s how you can use the GithubSearchTool to perform semantic searches within a GitHub repository:
 ```python
-from crewai_tools import GitHubSearchTool
+from crewai_tools import GithubSearchTool

 # Initialize the tool for semantic searches within a specific GitHub repository
-tool = GitHubSearchTool(
+tool = GithubSearchTool(
+    gh_token='...',
 	github_repo='https://github.com/example/repo',
 	content_types=['code', 'issue'] # Options: code, repo, pr, issue
 )
@@ -26,12 +27,14 @@ tool = GitHubSearchTool(
 # OR

 # Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
-tool = GitHubSearchTool(
+tool = GithubSearchTool(
+    gh_token='...',
 	content_types=['code', 'issue'] # Options: code, repo, pr, issue
 )
 ```

 ## Arguments
+- `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches.
 - `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
 - `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.

@@ -40,7 +43,7 @@ tool = GitHubSearchTool(
 By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:

 ```python
-tool = GitHubSearchTool(
+tool = GithubSearchTool(
    config=dict(
        llm=dict(
            provider="ollama", # or google, openai, anthropic, llama2, ...
--- a/src/crewai_tools/tools/github_search_tool/github_search_tool.py
+++ b/src/crewai_tools/tools/github_search_tool/github_search_tool.py
@@ -21,13 +21,13 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
    github_repo: str = Field(..., description="Mandatory github you want to search")
    content_types: List[str] = Field(
        ...,
-        description="Mandatory content types you want to be inlcuded search, options: [code, repo, pr, issue]",
+        description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]",
    )


 class GithubSearchTool(RagTool):
    name: str = "Search a github repo's content"
-    description: str = "A tool that can be used to semantic search a query from a github repo's content."
+    description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
    summarize: bool = False
    gh_token: str
    args_schema: Type[BaseModel] = GithubSearchToolSchema
@@ -36,18 +36,22 @@ class GithubSearchTool(RagTool):
    def __init__(self, github_repo: Optional[str] = None, **kwargs):
        super().__init__(**kwargs)
        if github_repo is not None:
-            self.add(github_repo)
-            self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content."
+            self.add(repo=github_repo)
+            self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
            self.args_schema = FixedGithubSearchToolSchema
+            self._generate_description()

    def add(
        self,
-        *args: Any,
+        repo: str,
+        content_types: List[str] | None = None,
        **kwargs: Any,
    ) -> None:
+        content_types = content_types or self.content_types
+
        kwargs["data_type"] = "github"
        kwargs["loader"] = GithubLoader(config={"token": self.gh_token})
-        super().add(*args, **kwargs)
+        super().add(f"repo:{repo} type:{','.join(content_types)}", **kwargs)

    def _before_run(
        self,
@@ -55,4 +59,13 @@ class GithubSearchTool(RagTool):
        **kwargs: Any,
    ) -> Any:
        if "github_repo" in kwargs:
-            self.add(kwargs["github_repo"])
+            self.add(
+                repo=kwargs["github_repo"], content_types=kwargs.get("content_types")
+            )
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/json_search_tool/json_search_tool.py
+++ b/src/crewai_tools/tools/json_search_tool/json_search_tool.py
@@ -34,6 +34,7 @@ class JSONSearchTool(RagTool):
            self.add(json_path)
            self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content."
            self.args_schema = FixedJSONSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class JSONSearchTool(RagTool):
    ) -> Any:
        if "json_path" in kwargs:
            self.add(kwargs["json_path"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py
+++ b/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py
@@ -34,6 +34,7 @@ class MDXSearchTool(RagTool):
            self.add(mdx)
            self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content."
            self.args_schema = FixedMDXSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class MDXSearchTool(RagTool):
    ) -> Any:
        if "mdx" in kwargs:
            self.add(kwargs["mdx"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py
+++ b/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py
@@ -33,6 +33,7 @@ class PDFSearchTool(RagTool):
            self.add(pdf)
            self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content."
            self.args_schema = FixedPDFSearchToolSchema
+            self._generate_description()

    def add(
        self,
--- a/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py
+++ b/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py
@@ -35,3 +35,10 @@ class PGSearchTool(RagTool):
        kwargs["data_type"] = "postgres"
        kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri))
        super().add(f"SELECT * FROM {table_name};", **kwargs)
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py
+++ b/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py
@@ -44,7 +44,12 @@ class ScrapeWebsiteTool(BaseTool):
 		**kwargs: Any,
 	) -> Any:
 		website_url = kwargs.get('website_url', self.website_url)
-		page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
+		page = requests.get(
+			website_url,
+			timeout=15,
+			headers=self.headers,
+			cookies=self.cookies if self.cookies else {}
+		)
 		parsed = BeautifulSoup(page.content, "html.parser")
 		text = parsed.get_text()
 		text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
--- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py
+++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py
@@ -7,7 +7,7 @@ from pydantic.v1 import BaseModel, Field
 from crewai_tools.tools.base_tool import BaseTool

 class SerperDevToolSchema(BaseModel):
-	"""Input for TXTSearchTool."""
+	"""Input for SerperDevTool."""
 	search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")

 class SerperDevTool(BaseTool):
--- a/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py
+++ b/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py
@@ -34,6 +34,7 @@ class TXTSearchTool(RagTool):
            self.add(txt)
            self.description = f"A tool that can be used to semantic search a query the {txt} txt's content."
            self.args_schema = FixedTXTSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class TXTSearchTool(RagTool):
    ) -> Any:
        if "txt" in kwargs:
            self.add(kwargs["txt"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/website_search/website_search_tool.py
+++ b/src/crewai_tools/tools/website_search/website_search_tool.py
@@ -34,6 +34,7 @@ class WebsiteSearchTool(RagTool):
            self.add(website)
            self.description = f"A tool that can be used to semantic search a query from {website} website content."
            self.args_schema = FixedWebsiteSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class WebsiteSearchTool(RagTool):
    ) -> Any:
        if "website" in kwargs:
            self.add(kwargs["website"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py
+++ b/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py
@@ -34,6 +34,7 @@ class XMLSearchTool(RagTool):
            self.add(xml)
            self.description = f"A tool that can be used to semantic search a query the {xml} XML's content."
            self.args_schema = FixedXMLSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class XMLSearchTool(RagTool):
    ) -> Any:
        if "xml" in kwargs:
            self.add(kwargs["xml"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py
+++ b/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py
@@ -34,6 +34,7 @@ class YoutubeChannelSearchTool(RagTool):
            self.add(youtube_channel_handle)
            self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content."
            self.args_schema = FixedYoutubeChannelSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -53,3 +54,10 @@ class YoutubeChannelSearchTool(RagTool):
    ) -> Any:
        if "youtube_channel_handle" in kwargs:
            self.add(kwargs["youtube_channel_handle"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)
--- a/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py
+++ b/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py
@@ -34,6 +34,7 @@ class YoutubeVideoSearchTool(RagTool):
            self.add(youtube_video_url)
            self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content."
            self.args_schema = FixedYoutubeVideoSearchToolSchema
+            self._generate_description()

    def add(
        self,
@@ -50,3 +51,10 @@ class YoutubeVideoSearchTool(RagTool):
    ) -> Any:
        if "youtube_video_url" in kwargs:
            self.add(kwargs["youtube_video_url"])
+
+    def _run(
+        self,
+        search_query: str,
+        **kwargs: Any,
+    ) -> Any:
+        return super()._run(query=search_query)