mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
Merge branch 'main' into feature/enhanced-serper-search
This commit is contained in:
10
README.md
10
README.md
@@ -13,7 +13,7 @@ In the realm of CrewAI agents, tools are pivotal for enhancing functionality. Th
|
||||
|
||||
<h3>
|
||||
|
||||
[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb)
|
||||
[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) | [Discourse](https://community.crewai.com/)
|
||||
|
||||
</h3>
|
||||
|
||||
@@ -51,7 +51,7 @@ There are three ways to create tools for crewAI agents:
|
||||
### Subclassing `BaseTool`
|
||||
|
||||
```python
|
||||
from crewai_tools import BaseTool
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
class MyCustomTool(BaseTool):
|
||||
name: str = "Name of my tool"
|
||||
@@ -70,7 +70,7 @@ Define a new class inheriting from `BaseTool`, specifying `name`, `description`,
|
||||
For a simpler approach, create a `Tool` object directly with the required attributes and a functional logic.
|
||||
|
||||
```python
|
||||
from crewai_tools import tool
|
||||
from crewai.tools import BaseTool
|
||||
@tool("Name of my tool")
|
||||
def my_tool(question: str) -> str:
|
||||
"""Clear description for what this tool is useful for, you agent will need this information to use it."""
|
||||
@@ -140,6 +140,4 @@ Thank you for your interest in enhancing the capabilities of AI agents through a
|
||||
|
||||
## Contact
|
||||
|
||||
For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb) or open an issue in this repository.
|
||||
|
||||
|
||||
For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb), [Discourse](https://community.crewai.com/) or open an issue in this repository.
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from .tools import (
|
||||
BraveSearchTool,
|
||||
BrowserbaseLoadTool,
|
||||
CodeDocsSearchTool,
|
||||
CodeInterpreterTool,
|
||||
@@ -19,6 +20,7 @@ from .tools import (
|
||||
LlamaIndexTool,
|
||||
MDXSearchTool,
|
||||
MultiOnTool,
|
||||
MySQLSearchTool,
|
||||
NL2SQLTool,
|
||||
PDFSearchTool,
|
||||
PGSearchTool,
|
||||
@@ -40,6 +42,5 @@ from .tools import (
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool,
|
||||
MySQLSearchTool
|
||||
WeaviateVectorSearchTool,
|
||||
)
|
||||
from .tools.base_tool import BaseTool, Tool, tool
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .brave_search_tool.brave_search_tool import BraveSearchTool
|
||||
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
|
||||
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
|
||||
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
|
||||
@@ -11,10 +12,10 @@ from .exa_tools.exa_search_tool import EXASearchTool
|
||||
from .file_read_tool.file_read_tool import FileReadTool
|
||||
from .file_writer_tool.file_writer_tool import FileWriterTool
|
||||
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
|
||||
FirecrawlCrawlWebsiteTool
|
||||
FirecrawlCrawlWebsiteTool,
|
||||
)
|
||||
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
|
||||
FirecrawlScrapeWebsiteTool
|
||||
FirecrawlScrapeWebsiteTool,
|
||||
)
|
||||
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
|
||||
from .github_search_tool.github_search_tool import GithubSearchTool
|
||||
@@ -22,16 +23,17 @@ from .json_search_tool.json_search_tool import JSONSearchTool
|
||||
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
|
||||
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
|
||||
from .multion_tool.multion_tool import MultiOnTool
|
||||
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
||||
from .nl2sql.nl2sql_tool import NL2SQLTool
|
||||
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
|
||||
from .pg_seach_tool.pg_search_tool import PGSearchTool
|
||||
from .rag.rag_tool import RagTool
|
||||
from .scrape_element_from_website.scrape_element_from_website import (
|
||||
ScrapeElementFromWebsiteTool
|
||||
ScrapeElementFromWebsiteTool,
|
||||
)
|
||||
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
|
||||
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
|
||||
ScrapflyScrapeWebsiteTool
|
||||
ScrapflyScrapeWebsiteTool,
|
||||
)
|
||||
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
|
||||
from .serper_dev_tool.serper_dev_tool import SerperDevTool
|
||||
@@ -46,7 +48,7 @@ from .vision_tool.vision_tool import VisionTool
|
||||
from .website_search.website_search_tool import WebsiteSearchTool
|
||||
from .xml_search_tool.xml_search_tool import XMLSearchTool
|
||||
from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||
YoutubeChannelSearchTool
|
||||
YoutubeChannelSearchTool,
|
||||
)
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
|
||||
from .weaviate_tool.vector_search import WeaviateVectorSearchTool
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
from typing import Any, Callable
|
||||
|
||||
from pydantic import BaseModel as PydanticBaseModel
|
||||
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
from crewai.tools.structured_tool import CrewStructuredTool
|
||||
|
||||
|
||||
class Tool(BaseTool):
|
||||
func: Callable
|
||||
"""The function that will be executed when the tool is called."""
|
||||
|
||||
def _run(self, *args: Any, **kwargs: Any) -> Any:
|
||||
return self.func(*args, **kwargs)
|
||||
|
||||
|
||||
def to_langchain(
|
||||
tools: list[BaseTool | CrewStructuredTool],
|
||||
) -> list[CrewStructuredTool]:
|
||||
return [t.to_structured_tool() if isinstance(t, BaseTool) else t for t in tools]
|
||||
|
||||
|
||||
def tool(*args):
|
||||
"""
|
||||
Decorator to create a tool from a function.
|
||||
"""
|
||||
|
||||
def _make_with_name(tool_name: str) -> Callable:
|
||||
def _make_tool(f: Callable) -> BaseTool:
|
||||
if f.__doc__ is None:
|
||||
raise ValueError("Function must have a docstring")
|
||||
if f.__annotations__ is None:
|
||||
raise ValueError("Function must have type annotations")
|
||||
|
||||
class_name = "".join(tool_name.split()).title()
|
||||
args_schema = type(
|
||||
class_name,
|
||||
(PydanticBaseModel,),
|
||||
{
|
||||
"__annotations__": {
|
||||
k: v for k, v in f.__annotations__.items() if k != "return"
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
return Tool(
|
||||
name=tool_name,
|
||||
description=f.__doc__,
|
||||
func=f,
|
||||
args_schema=args_schema,
|
||||
)
|
||||
|
||||
return _make_tool
|
||||
|
||||
if len(args) == 1 and callable(args[0]):
|
||||
return _make_with_name(args[0].__name__)(args[0])
|
||||
if len(args) == 1 and isinstance(args[0], str):
|
||||
return _make_with_name(args[0])
|
||||
raise ValueError("Invalid arguments")
|
||||
30
src/crewai_tools/tools/brave_search_tool/README.md
Normal file
30
src/crewai_tools/tools/brave_search_tool/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# BraveSearchTool Documentation
|
||||
|
||||
## Description
|
||||
This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back.
|
||||
|
||||
## Installation
|
||||
To incorporate this tool into your project, follow the installation instructions below:
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
The following example demonstrates how to initialize the tool and execute a search with a given query:
|
||||
|
||||
```python
|
||||
from crewai_tools import BraveSearchTool
|
||||
|
||||
# Initialize the tool for internet searching capabilities
|
||||
tool = BraveSearchTool()
|
||||
```
|
||||
|
||||
## Steps to Get Started
|
||||
To effectively use the `BraveSearchTool`, follow these steps:
|
||||
|
||||
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
|
||||
2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys).
|
||||
3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool.
|
||||
|
||||
## Conclusion
|
||||
By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
|
||||
118
src/crewai_tools/tools/brave_search_tool/brave_search_tool.py
Normal file
118
src/crewai_tools/tools/brave_search_tool/brave_search_tool.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
from typing import Any, ClassVar, Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
def _save_results_to_file(content: str) -> None:
|
||||
"""Saves the search results to a file."""
|
||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
with open(filename, "w") as file:
|
||||
file.write(content)
|
||||
print(f"Results saved to {filename}")
|
||||
|
||||
|
||||
class BraveSearchToolSchema(BaseModel):
|
||||
"""Input for BraveSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to search the internet"
|
||||
)
|
||||
|
||||
|
||||
class BraveSearchTool(BaseTool):
|
||||
"""
|
||||
BraveSearchTool - A tool for performing web searches using the Brave Search API.
|
||||
|
||||
This module provides functionality to search the internet using Brave's Search API,
|
||||
supporting customizable result counts and country-specific searches.
|
||||
|
||||
Dependencies:
|
||||
- requests
|
||||
- pydantic
|
||||
- python-dotenv (for API key management)
|
||||
"""
|
||||
|
||||
name: str = "Brave Web Search the internet"
|
||||
description: str = (
|
||||
"A tool that can be used to search the internet with a search_query."
|
||||
)
|
||||
args_schema: Type[BaseModel] = BraveSearchToolSchema
|
||||
search_url: str = "https://api.search.brave.com/res/v1/web/search"
|
||||
country: Optional[str] = ""
|
||||
n_results: int = 10
|
||||
save_file: bool = False
|
||||
_last_request_time: ClassVar[float] = 0
|
||||
_min_request_interval: ClassVar[float] = 1.0 # seconds
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
if "BRAVE_API_KEY" not in os.environ:
|
||||
raise ValueError(
|
||||
"BRAVE_API_KEY environment variable is required for BraveSearchTool"
|
||||
)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
current_time = time.time()
|
||||
if (current_time - self._last_request_time) < self._min_request_interval:
|
||||
time.sleep(
|
||||
self._min_request_interval - (current_time - self._last_request_time)
|
||||
)
|
||||
BraveSearchTool._last_request_time = time.time()
|
||||
try:
|
||||
search_query = kwargs.get("search_query") or kwargs.get("query")
|
||||
if not search_query:
|
||||
raise ValueError("Search query is required")
|
||||
|
||||
save_file = kwargs.get("save_file", self.save_file)
|
||||
n_results = kwargs.get("n_results", self.n_results)
|
||||
|
||||
payload = {"q": search_query, "count": n_results}
|
||||
|
||||
if self.country != "":
|
||||
payload["country"] = self.country
|
||||
|
||||
headers = {
|
||||
"X-Subscription-Token": os.environ["BRAVE_API_KEY"],
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
response = requests.get(self.search_url, headers=headers, params=payload)
|
||||
response.raise_for_status() # Handle non-200 responses
|
||||
results = response.json()
|
||||
|
||||
if "web" in results:
|
||||
results = results["web"]["results"]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['url']}",
|
||||
f"Snippet: {result['description']}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = "\n".join(string)
|
||||
except requests.RequestException as e:
|
||||
return f"Error performing search: {str(e)}"
|
||||
except KeyError as e:
|
||||
return f"Error parsing search results: {str(e)}"
|
||||
if save_file:
|
||||
_save_results_to_file(content)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return content
|
||||
@@ -1,9 +1,8 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class BrowserbaseLoadToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
|
||||
@@ -3,10 +3,9 @@ import os
|
||||
from typing import List, Optional, Type
|
||||
|
||||
import docker
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
"""Input for CodeInterpreterTool."""
|
||||
|
||||
@@ -5,8 +5,7 @@ Composio tools wrapper.
|
||||
import typing as t
|
||||
|
||||
import typing_extensions as te
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
|
||||
class ComposioTool(BaseTool):
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import json
|
||||
from typing import Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
"""Input for Dall-E Tool."""
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedDirectoryReadToolSchema(BaseModel):
|
||||
"""Input for DirectoryReadTool."""
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import os
|
||||
from typing import Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class EXABaseToolToolSchema(BaseModel):
|
||||
"""Input for EXABaseTool."""
|
||||
|
||||
@@ -1,28 +1,30 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from .exa_base_tool import EXABaseTool
|
||||
|
||||
|
||||
class EXASearchTool(EXABaseTool):
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
search_query = kwargs.get('search_query')
|
||||
if search_query is None:
|
||||
search_query = kwargs.get('query')
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
search_query = kwargs.get("search_query")
|
||||
if search_query is None:
|
||||
search_query = kwargs.get("query")
|
||||
|
||||
payload = {
|
||||
"query": search_query,
|
||||
"type": "magic",
|
||||
}
|
||||
payload = {
|
||||
"query": search_query,
|
||||
"type": "magic",
|
||||
}
|
||||
|
||||
headers = self.headers.copy()
|
||||
headers["x-api-key"] = os.environ['EXA_API_KEY']
|
||||
headers = self.headers.copy()
|
||||
headers["x-api-key"] = os.environ["EXA_API_KEY"]
|
||||
|
||||
response = requests.post(self.search_url, json=payload, headers=headers)
|
||||
results = response.json()
|
||||
if 'results' in results:
|
||||
results = super()._parse_results(results['results'])
|
||||
return results
|
||||
response = requests.post(self.search_url, json=payload, headers=headers)
|
||||
results = response.json()
|
||||
if "results" in results:
|
||||
results = super()._parse_results(results["results"])
|
||||
return results
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedFileReadToolSchema(BaseModel):
|
||||
"""Input for FileReadTool."""
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
from pydantic import BaseModel
|
||||
from ..base_tool import BaseTool
|
||||
from distutils.util import strtobool
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class FileWriterToolInput(BaseModel):
|
||||
filename: str
|
||||
filename: str
|
||||
directory: Optional[str] = "./"
|
||||
overwrite: str = "False"
|
||||
content: str
|
||||
|
||||
|
||||
|
||||
class FileWriterTool(BaseTool):
|
||||
name: str = "File Writer Tool"
|
||||
description: str = (
|
||||
@@ -26,7 +28,7 @@ class FileWriterTool(BaseTool):
|
||||
|
||||
# Construct the full path
|
||||
filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"])
|
||||
|
||||
|
||||
# Convert overwrite to boolean
|
||||
kwargs["overwrite"] = bool(strtobool(kwargs["overwrite"]))
|
||||
|
||||
@@ -46,4 +48,4 @@ class FileWriterTool(BaseTool):
|
||||
except KeyError as e:
|
||||
return f"An error occurred while accessing key: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"An error occurred while writing to the file: {str(e)}"
|
||||
return f"An error occurred while writing to the file: {str(e)}"
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
# Type checking import
|
||||
if TYPE_CHECKING:
|
||||
@@ -20,6 +19,9 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
|
||||
)
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
description: str = "Crawl webpages using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
|
||||
@@ -50,3 +52,15 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
|
||||
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
# Must rebuild model after class is defined
|
||||
FirecrawlCrawlWebsiteTool.model_rebuild()
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
# Type checking import
|
||||
if TYPE_CHECKING:
|
||||
@@ -24,6 +23,9 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
|
||||
)
|
||||
name: str = "Firecrawl web scrape tool"
|
||||
description: str = "Scrape webpages url using Firecrawl and return the contents"
|
||||
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
|
||||
@@ -61,3 +63,15 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
"timeout": timeout,
|
||||
}
|
||||
return self.firecrawl.scrape_url(url, options)
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
# Must rebuild model after class is defined
|
||||
FirecrawlScrapeWebsiteTool.model_rebuild()
|
||||
except ImportError:
|
||||
"""
|
||||
When this tool is not used, then exception can be ignored.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
# Type checking import
|
||||
if TYPE_CHECKING:
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
38
src/crewai_tools/tools/jina_scrape_website_tool/README.md
Normal file
38
src/crewai_tools/tools/jina_scrape_website_tool/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# JinaScrapeWebsiteTool
|
||||
|
||||
## Description
|
||||
A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package
|
||||
```shell
|
||||
pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
```python
|
||||
from crewai_tools import JinaScrapeWebsiteTool
|
||||
|
||||
# To enable scraping any website it finds during its execution
|
||||
tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY')
|
||||
|
||||
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
|
||||
tool = JinaScrapeWebsiteTool(website_url='https://www.example.com')
|
||||
|
||||
# With custom headers
|
||||
tool = JinaScrapeWebsiteTool(
|
||||
website_url='https://www.example.com',
|
||||
custom_headers={'X-Target-Selector': 'body, .class, #id'}
|
||||
)
|
||||
```
|
||||
|
||||
## Authentication
|
||||
The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key.
|
||||
|
||||
## Arguments
|
||||
- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read.
|
||||
- `api_key`: Optional Jina.ai API key for authenticated access to the reader service.
|
||||
- `custom_headers`: Optional dictionary of HTTP headers to use when making requests.
|
||||
|
||||
## Note
|
||||
This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities.
|
||||
@@ -0,0 +1,54 @@
|
||||
from typing import Optional, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class JinaScrapeWebsiteToolInput(BaseModel):
|
||||
"""Input schema for JinaScrapeWebsiteTool."""
|
||||
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
|
||||
|
||||
class JinaScrapeWebsiteTool(BaseTool):
|
||||
name: str = "JinaScrapeWebsiteTool"
|
||||
description: str = (
|
||||
"A tool that can be used to read a website content using Jina.ai reader and return markdown content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput
|
||||
website_url: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
headers: dict = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
website_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
custom_headers: Optional[dict] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.description = f"A tool that can be used to read {website_url}'s content and return markdown content."
|
||||
self._generate_description()
|
||||
|
||||
if custom_headers is not None:
|
||||
self.headers = custom_headers
|
||||
|
||||
if api_key is not None:
|
||||
self.headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
def _run(self, website_url: Optional[str] = None) -> str:
|
||||
url = website_url or self.website_url
|
||||
if not url:
|
||||
raise ValueError(
|
||||
"Website URL must be provided either during initialization or execution"
|
||||
)
|
||||
|
||||
response = requests.get(
|
||||
f"https://r.jina.ai/{url}", headers=self.headers, timeout=15
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
@@ -1,9 +1,8 @@
|
||||
from typing import Any, Optional, Type, cast
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class LlamaIndexTool(BaseTool):
|
||||
"""Tool to wrap LlamaIndex tools/query engines."""
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
|
||||
class MultiOnTool(BaseTool):
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
from typing import Any, Union
|
||||
from typing import Any, Type, Union
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from typing import Type, Any
|
||||
|
||||
class NL2SQLToolInput(BaseModel):
|
||||
sql_query: str = Field(
|
||||
@@ -13,6 +12,7 @@ class NL2SQLToolInput(BaseModel):
|
||||
description="The SQL query to execute.",
|
||||
)
|
||||
|
||||
|
||||
class NL2SQLTool(BaseTool):
|
||||
name: str = "NL2SQLTool"
|
||||
description: str = "Converts natural language to SQL queries and executes them."
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class Adapter(BaseModel, ABC):
|
||||
class Config:
|
||||
|
||||
@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
|
||||
"""Input for ScrapeElementFromWebsiteTool."""
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedScrapeWebsiteToolSchema(BaseModel):
|
||||
"""Input for ScrapeWebsiteTool."""
|
||||
@@ -67,7 +67,7 @@ class ScrapeWebsiteTool(BaseTool):
|
||||
page.encoding = page.apparent_encoding
|
||||
parsed = BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
text = parsed.get_text()
|
||||
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
|
||||
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
|
||||
text = parsed.get_text(" ")
|
||||
text = re.sub("[ \t]+", " ", text)
|
||||
text = re.sub("\\s+\n\\s+", "\n", text)
|
||||
return text
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Literal, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import time
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedSeleniumScrapingToolSchema(BaseModel):
|
||||
"""Input for SeleniumScrapingTool."""
|
||||
|
||||
@@ -5,16 +5,15 @@ import logging
|
||||
from typing import Any, Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _save_results_to_file(content: str) -> None:
|
||||
"""Saves the search results to a file."""
|
||||
try:
|
||||
|
||||
@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyNewsSearchToolSchema(BaseModel):
|
||||
"""Input for Serply News Search."""
|
||||
|
||||
@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyScholarSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Scholar Search."""
|
||||
|
||||
@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyWebSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Web Search."""
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
from typing import Any, Dict, Literal, Optional, Type
|
||||
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SpiderToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
|
||||
@@ -2,11 +2,10 @@ import base64
|
||||
from typing import Type
|
||||
|
||||
import requests
|
||||
from crewai.tools import BaseTool
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
"""Input for Vision Tool."""
|
||||
|
||||
80
src/crewai_tools/tools/weaviate_tool/README.md
Normal file
80
src/crewai_tools/tools/weaviate_tool/README.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# WeaviateVectorSearchTool
|
||||
|
||||
## Description
|
||||
This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query.
|
||||
|
||||
Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package by executing the following command in your terminal:
|
||||
|
||||
```shell
|
||||
uv pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
To utilize the WeaviateVectorSearchTool for different use cases, follow these examples:
|
||||
|
||||
```python
|
||||
from crewai_tools import WeaviateVectorSearchTool
|
||||
|
||||
# To enable the tool to search any website the agent comes across or learns about during its operation
|
||||
tool = WeaviateVectorSearchTool(
|
||||
collection_name='example_collections',
|
||||
limit=3,
|
||||
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
|
||||
weaviate_api_key="your-weaviate-api-key",
|
||||
)
|
||||
|
||||
# or
|
||||
|
||||
# Setup custom model for vectorizer and generative model
|
||||
tool = WeaviateVectorSearchTool(
|
||||
collection_name='example_collections',
|
||||
limit=3,
|
||||
vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"),
|
||||
generative_model=Configure.Generative.openai(model="gpt-4o-mini"),
|
||||
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
|
||||
weaviate_api_key="your-weaviate-api-key",
|
||||
)
|
||||
|
||||
# Adding the tool to an agent
|
||||
rag_agent = Agent(
|
||||
name="rag_agent",
|
||||
role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.",
|
||||
llm="gpt-4o-mini",
|
||||
tools=[tool],
|
||||
)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `collection_name` : The name of the collection to search within. (Required)
|
||||
- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required)
|
||||
- `weaviate_api_key` : The API key for the Weaviate cluster. (Required)
|
||||
- `limit` : The number of results to return. (Optional)
|
||||
- `vectorizer` : The vectorizer to use. (Optional)
|
||||
- `generative_model` : The generative model to use. (Optional)
|
||||
|
||||
Preloading the Weaviate database with documents:
|
||||
|
||||
```python
|
||||
from crewai_tools import WeaviateVectorSearchTool
|
||||
|
||||
# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect
|
||||
test_docs = client.collections.get("example_collections")
|
||||
|
||||
|
||||
docs_to_load = os.listdir("knowledge")
|
||||
with test_docs.batch.dynamic() as batch:
|
||||
for d in docs_to_load:
|
||||
with open(os.path.join("knowledge", d), "r") as f:
|
||||
content = f.read()
|
||||
batch.add_object(
|
||||
{
|
||||
"content": content,
|
||||
"year": d.split("_")[0],
|
||||
}
|
||||
)
|
||||
tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3)
|
||||
|
||||
```
|
||||
89
src/crewai_tools/tools/weaviate_tool/vector_search.py
Normal file
89
src/crewai_tools/tools/weaviate_tool/vector_search.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import os
|
||||
import json
|
||||
import weaviate
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Type, Optional
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
from weaviate.classes.config import Configure, Vectorizers
|
||||
from weaviate.classes.init import Auth
|
||||
|
||||
|
||||
class WeaviateToolSchema(BaseModel):
|
||||
"""Input for WeaviateTool."""
|
||||
|
||||
query: str = Field(
|
||||
...,
|
||||
description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.",
|
||||
)
|
||||
|
||||
|
||||
class WeaviateVectorSearchTool(BaseTool):
|
||||
"""Tool to search the Weaviate database"""
|
||||
|
||||
name: str = "WeaviateVectorSearchTool"
|
||||
description: str = "A tool to search the Weaviate database for relevant information on internal documents."
|
||||
args_schema: Type[BaseModel] = WeaviateToolSchema
|
||||
query: Optional[str] = None
|
||||
|
||||
vectorizer: Optional[Vectorizers] = Field(
|
||||
default=Configure.Vectorizer.text2vec_openai(
|
||||
model="nomic-embed-text",
|
||||
)
|
||||
)
|
||||
generative_model: Optional[str] = Field(
|
||||
default=Configure.Generative.openai(
|
||||
model="gpt-4o",
|
||||
),
|
||||
)
|
||||
collection_name: Optional[str] = None
|
||||
limit: Optional[int] = Field(default=3)
|
||||
headers: Optional[dict] = Field(
|
||||
default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}
|
||||
)
|
||||
weaviate_cluster_url: str = Field(
|
||||
...,
|
||||
description="The URL of the Weaviate cluster",
|
||||
)
|
||||
weaviate_api_key: str = Field(
|
||||
...,
|
||||
description="The API key for the Weaviate cluster",
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Search the Weaviate database
|
||||
|
||||
Args:
|
||||
query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question.
|
||||
|
||||
Returns:
|
||||
str: The result of the search query
|
||||
"""
|
||||
|
||||
if not self.weaviate_cluster_url or not self.weaviate_api_key:
|
||||
raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set")
|
||||
|
||||
client = weaviate.connect_to_weaviate_cloud(
|
||||
cluster_url=self.weaviate_cluster_url,
|
||||
auth_credentials=Auth.api_key(self.weaviate_api_key),
|
||||
headers=self.headers,
|
||||
)
|
||||
internal_docs = client.collections.get(self.collection_name)
|
||||
|
||||
if not internal_docs:
|
||||
internal_docs = client.collections.create(
|
||||
name=self.collection_name,
|
||||
vectorizer_config=self.vectorizer,
|
||||
generative_config=self.generative_model,
|
||||
)
|
||||
|
||||
response = internal_docs.query.near_text(
|
||||
query=query,
|
||||
limit=self.limit,
|
||||
)
|
||||
json_response = ""
|
||||
for obj in response.objects:
|
||||
json_response += json.dumps(obj.properties, indent=2)
|
||||
|
||||
client.close()
|
||||
return json_response
|
||||
50
tests/tools/brave_search_tool_test.py
Normal file
50
tests/tools/brave_search_tool_test.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def brave_tool():
|
||||
return BraveSearchTool(n_results=2)
|
||||
|
||||
|
||||
def test_brave_tool_initialization():
|
||||
tool = BraveSearchTool()
|
||||
assert tool.n_results == 10
|
||||
assert tool.save_file is False
|
||||
|
||||
|
||||
@patch("requests.get")
|
||||
def test_brave_tool_search(mock_get, brave_tool):
|
||||
mock_response = {
|
||||
"web": {
|
||||
"results": [
|
||||
{
|
||||
"title": "Test Title",
|
||||
"url": "http://test.com",
|
||||
"description": "Test Description",
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_get.return_value.json.return_value = mock_response
|
||||
|
||||
result = brave_tool.run(search_query="test")
|
||||
assert "Test Title" in result
|
||||
assert "http://test.com" in result
|
||||
|
||||
|
||||
def test_brave_tool():
|
||||
tool = BraveSearchTool(
|
||||
n_results=2,
|
||||
)
|
||||
x = tool.run(search_query="ChatGPT")
|
||||
print(x)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_brave_tool()
|
||||
test_brave_tool_initialization()
|
||||
# test_brave_tool_search(brave_tool)
|
||||
Reference in New Issue
Block a user