Merge branch 'main' into jeroen-vet-patch-1

This commit is contained in:
João Moura
2024-05-02 02:26:48 -03:00
committed by GitHub
23 changed files with 232 additions and 42 deletions

View File

@@ -1,5 +1,6 @@
from .tools.base_tool import BaseTool, Tool, tool from .tools.base_tool import BaseTool, Tool, tool
from .tools import ( from .tools import (
BrowserbaseLoadTool,
CodeDocsSearchTool, CodeDocsSearchTool,
CSVSearchTool, CSVSearchTool,
DirectorySearchTool, DirectorySearchTool,

View File

@@ -1,3 +1,4 @@
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .csv_search_tool.csv_search_tool import CSVSearchTool from .csv_search_tool.csv_search_tool import CSVSearchTool
from .directory_search_tool.directory_search_tool import DirectorySearchTool from .directory_search_tool.directory_search_tool import DirectorySearchTool
@@ -18,4 +19,4 @@ from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
from .website_search.website_search_tool import WebsiteSearchTool from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool

View File

@@ -20,7 +20,7 @@ class BaseTool(BaseModel, ABC):
"""The schema for the arguments that the tool accepts.""" """The schema for the arguments that the tool accepts."""
description_updated: bool = False description_updated: bool = False
"""Flag to check if the description has been updated.""" """Flag to check if the description has been updated."""
cache_function: Optional[Callable] = lambda: True cache_function: Optional[Callable] = lambda _args, _result: True
"""Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached.""" """Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
@validator("args_schema", always=True, pre=True) @validator("args_schema", always=True, pre=True)

View File

@@ -0,0 +1,29 @@
# BrowserbaseLoadTool
## Description
[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving.
## Installation
- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`).
- Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `crewai[tools]` package:
```
pip install browserbase 'crewai[tools]'
```
## Example
Utilize the BrowserbaseLoadTool as follows to allow your agent to load websites:
```python
from crewai_tools import BrowserbaseLoadTool
tool = BrowserbaseLoadTool()
```
## Arguments
- `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
- `text_content`: Optional. Load pages as readable text. Default is `False`.

View File

@@ -0,0 +1,29 @@
from crewai_tools import BaseTool
from typing import Optional, Any
from pydantic.v1 import BaseModel, Field
class BrowserbaseLoadToolSchema(BaseModel):
url: str = Field(description="Website URL")
class BrowserbaseLoadTool(BaseTool):
name: str = "Browserbase web load tool"
description: str = "Load webpages in a headless browser using Browserbase and return the contents"
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
api_key: Optional[str] = None
text_content: Optional[bool] = False
browserbase: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs):
super().__init__(**kwargs)
try:
from browserbase import Browserbase
except ImportError:
raise ImportError(
"`browserbase` package not found, please run `pip install browserbase`"
)
self.browserbase = Browserbase(api_key=api_key)
self.text_content = text_content
def _run(self, url: str):
return self.browserbase.load_url(url, text_content=self.text_content)

View File

@@ -34,6 +34,7 @@ class CodeDocsSearchTool(RagTool):
self.add(docs_url) self.add(docs_url)
self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content." self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content."
self.args_schema = FixedCodeDocsSearchToolSchema self.args_schema = FixedCodeDocsSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class CodeDocsSearchTool(RagTool):
) -> Any: ) -> Any:
if "docs_url" in kwargs: if "docs_url" in kwargs:
self.add(kwargs["docs_url"]) self.add(kwargs["docs_url"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class CSVSearchTool(RagTool):
self.add(csv) self.add(csv)
self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content." self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content."
self.args_schema = FixedCSVSearchToolSchema self.args_schema = FixedCSVSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class CSVSearchTool(RagTool):
) -> Any: ) -> Any:
if "csv" in kwargs: if "csv" in kwargs:
self.add(kwargs["csv"]) self.add(kwargs["csv"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class DirectorySearchTool(RagTool):
self.add(directory) self.add(directory)
self.description = f"A tool that can be used to semantic search a query the {directory} directory's content." self.description = f"A tool that can be used to semantic search a query the {directory} directory's content."
self.args_schema = FixedDirectorySearchToolSchema self.args_schema = FixedDirectorySearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class DirectorySearchTool(RagTool):
) -> Any: ) -> Any:
if "directory" in kwargs: if "directory" in kwargs:
self.add(kwargs["directory"]) self.add(kwargs["directory"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class DOCXSearchTool(RagTool):
self.add(docx) self.add(docx)
self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content." self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content."
self.args_schema = FixedDOCXSearchToolSchema self.args_schema = FixedDOCXSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class DOCXSearchTool(RagTool):
) -> Any: ) -> Any:
if "docx" in kwargs: if "docx" in kwargs:
self.add(kwargs["docx"]) self.add(kwargs["docx"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -2,32 +2,45 @@ from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field from pydantic.v1 import BaseModel, Field
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FixedFileReadToolSchema(BaseModel): class FixedFileReadToolSchema(BaseModel):
"""Input for FileReadTool.""" """Input for FileReadTool."""
pass pass
class FileReadToolSchema(FixedFileReadToolSchema): class FileReadToolSchema(FixedFileReadToolSchema):
"""Input for FileReadTool.""" """Input for FileReadTool."""
file_path: str = Field(..., description="Mandatory file full path to read the file") file_path: str = Field(
...,
description="Mandatory file full path to read the file"
)
class FileReadTool(BaseTool): class FileReadTool(BaseTool):
name: str = "Read a file's content" name: str = "Read a file's content"
description: str = "A tool that can be used to read a file's content." description: str = "A tool that can be used to read a file's content."
args_schema: Type[BaseModel] = FileReadToolSchema args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None file_path: Optional[str] = None
def __init__(self, file_path: Optional[str] = None, **kwargs): def __init__(
super().__init__(**kwargs) self,
if file_path is not None: file_path: Optional[str] = None,
self.file_path = file_path **kwargs
self.description = f"A tool that can be used to read {file_path}'s content." ):
self.args_schema = FixedFileReadToolSchema super().__init__(**kwargs)
self._generate_description() if file_path is not None:
self.file_path = file_path
self.description = f"A tool that can be used to read {file_path}'s content."
self.args_schema = FixedFileReadToolSchema
self._generate_description()
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
file_path = kwargs.get('file_path', self.file_path) try:
with open(file_path, 'r') as file: file_path = kwargs.get('file_path', self.file_path)
return file.read() with open(file_path, 'r') as file:
return file.read()
except Exception as e:
return f"Fail to read the file {file_path}. Error: {e}"

View File

@@ -1,24 +1,25 @@
# GitHubSearchTool # GithubSearchTool
## Description ## Description
The GitHubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub. The GithubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
## Installation ## Installation
To use the GitHubSearchTool, first ensure the crewai_tools package is installed in your Python environment: To use the GithubSearchTool, first ensure the crewai_tools package is installed in your Python environment:
```shell ```shell
pip install 'crewai[tools]' pip install 'crewai[tools]'
``` ```
This command installs the necessary package to run the GitHubSearchTool along with any other tools included in the crewai_tools package. This command installs the necessary package to run the GithubSearchTool along with any other tools included in the crewai_tools package.
## Example ## Example
Heres how you can use the GitHubSearchTool to perform semantic searches within a GitHub repository: Heres how you can use the GithubSearchTool to perform semantic searches within a GitHub repository:
```python ```python
from crewai_tools import GitHubSearchTool from crewai_tools import GithubSearchTool
# Initialize the tool for semantic searches within a specific GitHub repository # Initialize the tool for semantic searches within a specific GitHub repository
tool = GitHubSearchTool( tool = GithubSearchTool(
gh_token='...',
github_repo='https://github.com/example/repo', github_repo='https://github.com/example/repo',
content_types=['code', 'issue'] # Options: code, repo, pr, issue content_types=['code', 'issue'] # Options: code, repo, pr, issue
) )
@@ -26,12 +27,14 @@ tool = GitHubSearchTool(
# OR # OR
# Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution # Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
tool = GitHubSearchTool( tool = GithubSearchTool(
gh_token='...',
content_types=['code', 'issue'] # Options: code, repo, pr, issue content_types=['code', 'issue'] # Options: code, repo, pr, issue
) )
``` ```
## Arguments ## Arguments
- `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches.
- `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search. - `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
- `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository. - `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.
@@ -40,7 +43,7 @@ tool = GitHubSearchTool(
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
```python ```python
tool = GitHubSearchTool( tool = GithubSearchTool(
config=dict( config=dict(
llm=dict( llm=dict(
provider="ollama", # or google, openai, anthropic, llama2, ... provider="ollama", # or google, openai, anthropic, llama2, ...

View File

@@ -21,13 +21,13 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
github_repo: str = Field(..., description="Mandatory github you want to search") github_repo: str = Field(..., description="Mandatory github you want to search")
content_types: List[str] = Field( content_types: List[str] = Field(
..., ...,
description="Mandatory content types you want to be inlcuded search, options: [code, repo, pr, issue]", description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]",
) )
class GithubSearchTool(RagTool): class GithubSearchTool(RagTool):
name: str = "Search a github repo's content" name: str = "Search a github repo's content"
description: str = "A tool that can be used to semantic search a query from a github repo's content." description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
summarize: bool = False summarize: bool = False
gh_token: str gh_token: str
args_schema: Type[BaseModel] = GithubSearchToolSchema args_schema: Type[BaseModel] = GithubSearchToolSchema
@@ -36,18 +36,22 @@ class GithubSearchTool(RagTool):
def __init__(self, github_repo: Optional[str] = None, **kwargs): def __init__(self, github_repo: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
if github_repo is not None: if github_repo is not None:
self.add(github_repo) self.add(repo=github_repo)
self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content." self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
self.args_schema = FixedGithubSearchToolSchema self.args_schema = FixedGithubSearchToolSchema
self._generate_description()
def add( def add(
self, self,
*args: Any, repo: str,
content_types: List[str] | None = None,
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
content_types = content_types or self.content_types
kwargs["data_type"] = "github" kwargs["data_type"] = "github"
kwargs["loader"] = GithubLoader(config={"token": self.gh_token}) kwargs["loader"] = GithubLoader(config={"token": self.gh_token})
super().add(*args, **kwargs) super().add(f"repo:{repo} type:{','.join(content_types)}", **kwargs)
def _before_run( def _before_run(
self, self,
@@ -55,4 +59,13 @@ class GithubSearchTool(RagTool):
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
if "github_repo" in kwargs: if "github_repo" in kwargs:
self.add(kwargs["github_repo"]) self.add(
repo=kwargs["github_repo"], content_types=kwargs.get("content_types")
)
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class JSONSearchTool(RagTool):
self.add(json_path) self.add(json_path)
self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content." self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content."
self.args_schema = FixedJSONSearchToolSchema self.args_schema = FixedJSONSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class JSONSearchTool(RagTool):
) -> Any: ) -> Any:
if "json_path" in kwargs: if "json_path" in kwargs:
self.add(kwargs["json_path"]) self.add(kwargs["json_path"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class MDXSearchTool(RagTool):
self.add(mdx) self.add(mdx)
self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content." self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content."
self.args_schema = FixedMDXSearchToolSchema self.args_schema = FixedMDXSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class MDXSearchTool(RagTool):
) -> Any: ) -> Any:
if "mdx" in kwargs: if "mdx" in kwargs:
self.add(kwargs["mdx"]) self.add(kwargs["mdx"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -33,6 +33,7 @@ class PDFSearchTool(RagTool):
self.add(pdf) self.add(pdf)
self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content." self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content."
self.args_schema = FixedPDFSearchToolSchema self.args_schema = FixedPDFSearchToolSchema
self._generate_description()
def add( def add(
self, self,

View File

@@ -35,3 +35,10 @@ class PGSearchTool(RagTool):
kwargs["data_type"] = "postgres" kwargs["data_type"] = "postgres"
kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri)) kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri))
super().add(f"SELECT * FROM {table_name};", **kwargs) super().add(f"SELECT * FROM {table_name};", **kwargs)
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -44,7 +44,12 @@ class ScrapeWebsiteTool(BaseTool):
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
website_url = kwargs.get('website_url', self.website_url) website_url = kwargs.get('website_url', self.website_url)
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {}) page = requests.get(
website_url,
timeout=15,
headers=self.headers,
cookies=self.cookies if self.cookies else {}
)
parsed = BeautifulSoup(page.content, "html.parser") parsed = BeautifulSoup(page.content, "html.parser")
text = parsed.get_text() text = parsed.get_text()
text = '\n'.join([i for i in text.split('\n') if i.strip() != '']) text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])

View File

@@ -7,7 +7,7 @@ from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class SerperDevToolSchema(BaseModel): class SerperDevToolSchema(BaseModel):
"""Input for TXTSearchTool.""" """Input for SerperDevTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet") search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
class SerperDevTool(BaseTool): class SerperDevTool(BaseTool):

View File

@@ -34,6 +34,7 @@ class TXTSearchTool(RagTool):
self.add(txt) self.add(txt)
self.description = f"A tool that can be used to semantic search a query the {txt} txt's content." self.description = f"A tool that can be used to semantic search a query the {txt} txt's content."
self.args_schema = FixedTXTSearchToolSchema self.args_schema = FixedTXTSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class TXTSearchTool(RagTool):
) -> Any: ) -> Any:
if "txt" in kwargs: if "txt" in kwargs:
self.add(kwargs["txt"]) self.add(kwargs["txt"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class WebsiteSearchTool(RagTool):
self.add(website) self.add(website)
self.description = f"A tool that can be used to semantic search a query from {website} website content." self.description = f"A tool that can be used to semantic search a query from {website} website content."
self.args_schema = FixedWebsiteSearchToolSchema self.args_schema = FixedWebsiteSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class WebsiteSearchTool(RagTool):
) -> Any: ) -> Any:
if "website" in kwargs: if "website" in kwargs:
self.add(kwargs["website"]) self.add(kwargs["website"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class XMLSearchTool(RagTool):
self.add(xml) self.add(xml)
self.description = f"A tool that can be used to semantic search a query the {xml} XML's content." self.description = f"A tool that can be used to semantic search a query the {xml} XML's content."
self.args_schema = FixedXMLSearchToolSchema self.args_schema = FixedXMLSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class XMLSearchTool(RagTool):
) -> Any: ) -> Any:
if "xml" in kwargs: if "xml" in kwargs:
self.add(kwargs["xml"]) self.add(kwargs["xml"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class YoutubeChannelSearchTool(RagTool):
self.add(youtube_channel_handle) self.add(youtube_channel_handle)
self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content." self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content."
self.args_schema = FixedYoutubeChannelSearchToolSchema self.args_schema = FixedYoutubeChannelSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -53,3 +54,10 @@ class YoutubeChannelSearchTool(RagTool):
) -> Any: ) -> Any:
if "youtube_channel_handle" in kwargs: if "youtube_channel_handle" in kwargs:
self.add(kwargs["youtube_channel_handle"]) self.add(kwargs["youtube_channel_handle"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class YoutubeVideoSearchTool(RagTool):
self.add(youtube_video_url) self.add(youtube_video_url)
self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content." self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content."
self.args_schema = FixedYoutubeVideoSearchToolSchema self.args_schema = FixedYoutubeVideoSearchToolSchema
self._generate_description()
def add( def add(
self, self,
@@ -50,3 +51,10 @@ class YoutubeVideoSearchTool(RagTool):
) -> Any: ) -> Any:
if "youtube_video_url" in kwargs: if "youtube_video_url" in kwargs:
self.add(kwargs["youtube_video_url"]) self.add(kwargs["youtube_video_url"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)