Merge branch 'main' into jeroen-vet-patch-1

This commit is contained in:
João Moura
2024-05-02 02:26:48 -03:00
committed by GitHub
23 changed files with 232 additions and 42 deletions

View File

@@ -1,5 +1,6 @@
from .tools.base_tool import BaseTool, Tool, tool
from .tools import (
BrowserbaseLoadTool,
CodeDocsSearchTool,
CSVSearchTool,
DirectorySearchTool,

View File

@@ -1,3 +1,4 @@
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .csv_search_tool.csv_search_tool import CSVSearchTool
from .directory_search_tool.directory_search_tool import DirectorySearchTool
@@ -18,4 +19,4 @@ from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool

View File

@@ -20,7 +20,7 @@ class BaseTool(BaseModel, ABC):
"""The schema for the arguments that the tool accepts."""
description_updated: bool = False
"""Flag to check if the description has been updated."""
cache_function: Optional[Callable] = lambda: True
cache_function: Optional[Callable] = lambda _args, _result: True
"""Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
@validator("args_schema", always=True, pre=True)

View File

@@ -0,0 +1,29 @@
# BrowserbaseLoadTool
## Description
[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving.
## Installation
- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`).
- Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `crewai[tools]` package:
```
pip install browserbase 'crewai[tools]'
```
## Example
Utilize the BrowserbaseLoadTool as follows to allow your agent to load websites:
```python
from crewai_tools import BrowserbaseLoadTool
tool = BrowserbaseLoadTool()
```
## Arguments
- `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
- `text_content`: Optional. Load pages as readable text. Default is `False`.

View File

@@ -0,0 +1,29 @@
from crewai_tools import BaseTool
from typing import Optional, Any
from pydantic.v1 import BaseModel, Field
class BrowserbaseLoadToolSchema(BaseModel):
url: str = Field(description="Website URL")
class BrowserbaseLoadTool(BaseTool):
name: str = "Browserbase web load tool"
description: str = "Load webpages in a headless browser using Browserbase and return the contents"
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
api_key: Optional[str] = None
text_content: Optional[bool] = False
browserbase: Optional[Any] = None
def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs):
super().__init__(**kwargs)
try:
from browserbase import Browserbase
except ImportError:
raise ImportError(
"`browserbase` package not found, please run `pip install browserbase`"
)
self.browserbase = Browserbase(api_key=api_key)
self.text_content = text_content
def _run(self, url: str):
return self.browserbase.load_url(url, text_content=self.text_content)

View File

@@ -34,6 +34,7 @@ class CodeDocsSearchTool(RagTool):
self.add(docs_url)
self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content."
self.args_schema = FixedCodeDocsSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class CodeDocsSearchTool(RagTool):
) -> Any:
if "docs_url" in kwargs:
self.add(kwargs["docs_url"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class CSVSearchTool(RagTool):
self.add(csv)
self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content."
self.args_schema = FixedCSVSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class CSVSearchTool(RagTool):
) -> Any:
if "csv" in kwargs:
self.add(kwargs["csv"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class DirectorySearchTool(RagTool):
self.add(directory)
self.description = f"A tool that can be used to semantic search a query the {directory} directory's content."
self.args_schema = FixedDirectorySearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class DirectorySearchTool(RagTool):
) -> Any:
if "directory" in kwargs:
self.add(kwargs["directory"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class DOCXSearchTool(RagTool):
self.add(docx)
self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content."
self.args_schema = FixedDOCXSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class DOCXSearchTool(RagTool):
) -> Any:
if "docx" in kwargs:
self.add(kwargs["docx"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -2,32 +2,45 @@ from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from ..base_tool import BaseTool
class FixedFileReadToolSchema(BaseModel):
"""Input for FileReadTool."""
pass
"""Input for FileReadTool."""
pass
class FileReadToolSchema(FixedFileReadToolSchema):
"""Input for FileReadTool."""
file_path: str = Field(..., description="Mandatory file full path to read the file")
"""Input for FileReadTool."""
file_path: str = Field(
...,
description="Mandatory file full path to read the file"
)
class FileReadTool(BaseTool):
name: str = "Read a file's content"
description: str = "A tool that can be used to read a file's content."
args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None
name: str = "Read a file's content"
description: str = "A tool that can be used to read a file's content."
args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None
def __init__(self, file_path: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if file_path is not None:
self.file_path = file_path
self.description = f"A tool that can be used to read {file_path}'s content."
self.args_schema = FixedFileReadToolSchema
self._generate_description()
def __init__(
self,
file_path: Optional[str] = None,
**kwargs
):
super().__init__(**kwargs)
if file_path is not None:
self.file_path = file_path
self.description = f"A tool that can be used to read {file_path}'s content."
self.args_schema = FixedFileReadToolSchema
self._generate_description()
def _run(
self,
**kwargs: Any,
) -> Any:
file_path = kwargs.get('file_path', self.file_path)
with open(file_path, 'r') as file:
return file.read()
def _run(
self,
**kwargs: Any,
) -> Any:
try:
file_path = kwargs.get('file_path', self.file_path)
with open(file_path, 'r') as file:
return file.read()
except Exception as e:
return f"Fail to read the file {file_path}. Error: {e}"

View File

@@ -1,24 +1,25 @@
# GitHubSearchTool
# GithubSearchTool
## Description
The GitHubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
The GithubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
## Installation
To use the GitHubSearchTool, first ensure the crewai_tools package is installed in your Python environment:
To use the GithubSearchTool, first ensure the crewai_tools package is installed in your Python environment:
```shell
pip install 'crewai[tools]'
```
This command installs the necessary package to run the GitHubSearchTool along with any other tools included in the crewai_tools package.
This command installs the necessary package to run the GithubSearchTool along with any other tools included in the crewai_tools package.
## Example
Heres how you can use the GitHubSearchTool to perform semantic searches within a GitHub repository:
Heres how you can use the GithubSearchTool to perform semantic searches within a GitHub repository:
```python
from crewai_tools import GitHubSearchTool
from crewai_tools import GithubSearchTool
# Initialize the tool for semantic searches within a specific GitHub repository
tool = GitHubSearchTool(
tool = GithubSearchTool(
gh_token='...',
github_repo='https://github.com/example/repo',
content_types=['code', 'issue'] # Options: code, repo, pr, issue
)
@@ -26,12 +27,14 @@ tool = GitHubSearchTool(
# OR
# Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
tool = GitHubSearchTool(
tool = GithubSearchTool(
gh_token='...',
content_types=['code', 'issue'] # Options: code, repo, pr, issue
)
```
## Arguments
- `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches.
- `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
- `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.
@@ -40,7 +43,7 @@ tool = GitHubSearchTool(
By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
```python
tool = GitHubSearchTool(
tool = GithubSearchTool(
config=dict(
llm=dict(
provider="ollama", # or google, openai, anthropic, llama2, ...

View File

@@ -21,13 +21,13 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
github_repo: str = Field(..., description="Mandatory github you want to search")
content_types: List[str] = Field(
...,
description="Mandatory content types you want to be inlcuded search, options: [code, repo, pr, issue]",
description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]",
)
class GithubSearchTool(RagTool):
name: str = "Search a github repo's content"
description: str = "A tool that can be used to semantic search a query from a github repo's content."
description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
summarize: bool = False
gh_token: str
args_schema: Type[BaseModel] = GithubSearchToolSchema
@@ -36,18 +36,22 @@ class GithubSearchTool(RagTool):
def __init__(self, github_repo: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if github_repo is not None:
self.add(github_repo)
self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content."
self.add(repo=github_repo)
self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
self.args_schema = FixedGithubSearchToolSchema
self._generate_description()
def add(
self,
*args: Any,
repo: str,
content_types: List[str] | None = None,
**kwargs: Any,
) -> None:
content_types = content_types or self.content_types
kwargs["data_type"] = "github"
kwargs["loader"] = GithubLoader(config={"token": self.gh_token})
super().add(*args, **kwargs)
super().add(f"repo:{repo} type:{','.join(content_types)}", **kwargs)
def _before_run(
self,
@@ -55,4 +59,13 @@ class GithubSearchTool(RagTool):
**kwargs: Any,
) -> Any:
if "github_repo" in kwargs:
self.add(kwargs["github_repo"])
self.add(
repo=kwargs["github_repo"], content_types=kwargs.get("content_types")
)
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class JSONSearchTool(RagTool):
self.add(json_path)
self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content."
self.args_schema = FixedJSONSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class JSONSearchTool(RagTool):
) -> Any:
if "json_path" in kwargs:
self.add(kwargs["json_path"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class MDXSearchTool(RagTool):
self.add(mdx)
self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content."
self.args_schema = FixedMDXSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class MDXSearchTool(RagTool):
) -> Any:
if "mdx" in kwargs:
self.add(kwargs["mdx"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -33,6 +33,7 @@ class PDFSearchTool(RagTool):
self.add(pdf)
self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content."
self.args_schema = FixedPDFSearchToolSchema
self._generate_description()
def add(
self,

View File

@@ -35,3 +35,10 @@ class PGSearchTool(RagTool):
kwargs["data_type"] = "postgres"
kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri))
super().add(f"SELECT * FROM {table_name};", **kwargs)
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -44,7 +44,12 @@ class ScrapeWebsiteTool(BaseTool):
**kwargs: Any,
) -> Any:
website_url = kwargs.get('website_url', self.website_url)
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
page = requests.get(
website_url,
timeout=15,
headers=self.headers,
cookies=self.cookies if self.cookies else {}
)
parsed = BeautifulSoup(page.content, "html.parser")
text = parsed.get_text()
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])

View File

@@ -7,7 +7,7 @@ from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerperDevToolSchema(BaseModel):
"""Input for TXTSearchTool."""
"""Input for SerperDevTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
class SerperDevTool(BaseTool):

View File

@@ -34,6 +34,7 @@ class TXTSearchTool(RagTool):
self.add(txt)
self.description = f"A tool that can be used to semantic search a query the {txt} txt's content."
self.args_schema = FixedTXTSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class TXTSearchTool(RagTool):
) -> Any:
if "txt" in kwargs:
self.add(kwargs["txt"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class WebsiteSearchTool(RagTool):
self.add(website)
self.description = f"A tool that can be used to semantic search a query from {website} website content."
self.args_schema = FixedWebsiteSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class WebsiteSearchTool(RagTool):
) -> Any:
if "website" in kwargs:
self.add(kwargs["website"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class XMLSearchTool(RagTool):
self.add(xml)
self.description = f"A tool that can be used to semantic search a query the {xml} XML's content."
self.args_schema = FixedXMLSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class XMLSearchTool(RagTool):
) -> Any:
if "xml" in kwargs:
self.add(kwargs["xml"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class YoutubeChannelSearchTool(RagTool):
self.add(youtube_channel_handle)
self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content."
self.args_schema = FixedYoutubeChannelSearchToolSchema
self._generate_description()
def add(
self,
@@ -53,3 +54,10 @@ class YoutubeChannelSearchTool(RagTool):
) -> Any:
if "youtube_channel_handle" in kwargs:
self.add(kwargs["youtube_channel_handle"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)

View File

@@ -34,6 +34,7 @@ class YoutubeVideoSearchTool(RagTool):
self.add(youtube_video_url)
self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content."
self.args_schema = FixedYoutubeVideoSearchToolSchema
self._generate_description()
def add(
self,
@@ -50,3 +51,10 @@ class YoutubeVideoSearchTool(RagTool):
) -> Any:
if "youtube_video_url" in kwargs:
self.add(kwargs["youtube_video_url"])
def _run(
self,
search_query: str,
**kwargs: Any,
) -> Any:
return super()._run(query=search_query)