Merge branch 'main' into feature/enhanced-serper-search

This commit is contained in:
Rip&Tear
2024-12-16 22:53:55 +08:00
committed by GitHub
38 changed files with 566 additions and 154 deletions

View File

@@ -13,7 +13,7 @@ In the realm of CrewAI agents, tools are pivotal for enhancing functionality. Th
<h3>
[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb)
[Homepage](https://www.crewai.io/) | [Documentation](https://docs.crewai.com/) | [Chat with Docs](https://chatg.pt/DWjSBZn) | [Examples](https://github.com/crewAIInc/crewAI-examples) | [Discord](https://discord.com/invite/X4JWnZnxPb) | [Discourse](https://community.crewai.com/)
</h3>
@@ -51,7 +51,7 @@ There are three ways to create tools for crewAI agents:
### Subclassing `BaseTool`
```python
from crewai_tools import BaseTool
from crewai.tools import BaseTool
class MyCustomTool(BaseTool):
name: str = "Name of my tool"
@@ -70,7 +70,7 @@ Define a new class inheriting from `BaseTool`, specifying `name`, `description`,
For a simpler approach, create a `Tool` object directly with the required attributes and a functional logic.
```python
from crewai_tools import tool
from crewai.tools import BaseTool
@tool("Name of my tool")
def my_tool(question: str) -> str:
"""Clear description for what this tool is useful for, you agent will need this information to use it."""
@@ -140,6 +140,4 @@ Thank you for your interest in enhancing the capabilities of AI agents through a
## Contact
For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb) or open an issue in this repository.
For questions or support, please join our [Discord community](https://discord.com/invite/X4JWnZnxPb), [Discourse](https://community.crewai.com/) or open an issue in this repository.

View File

@@ -1,4 +1,5 @@
from .tools import (
BraveSearchTool,
BrowserbaseLoadTool,
CodeDocsSearchTool,
CodeInterpreterTool,
@@ -19,6 +20,7 @@ from .tools import (
LlamaIndexTool,
MDXSearchTool,
MultiOnTool,
MySQLSearchTool,
NL2SQLTool,
PDFSearchTool,
PGSearchTool,
@@ -40,6 +42,5 @@ from .tools import (
XMLSearchTool,
YoutubeChannelSearchTool,
YoutubeVideoSearchTool,
MySQLSearchTool
WeaviateVectorSearchTool,
)
from .tools.base_tool import BaseTool, Tool, tool

View File

@@ -1,3 +1,4 @@
from .brave_search_tool.brave_search_tool import BraveSearchTool
from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
from .code_interpreter_tool.code_interpreter_tool import CodeInterpreterTool
@@ -11,10 +12,10 @@ from .exa_tools.exa_search_tool import EXASearchTool
from .file_read_tool.file_read_tool import FileReadTool
from .file_writer_tool.file_writer_tool import FileWriterTool
from .firecrawl_crawl_website_tool.firecrawl_crawl_website_tool import (
FirecrawlCrawlWebsiteTool
FirecrawlCrawlWebsiteTool,
)
from .firecrawl_scrape_website_tool.firecrawl_scrape_website_tool import (
FirecrawlScrapeWebsiteTool
FirecrawlScrapeWebsiteTool,
)
from .firecrawl_search_tool.firecrawl_search_tool import FirecrawlSearchTool
from .github_search_tool.github_search_tool import GithubSearchTool
@@ -22,16 +23,17 @@ from .json_search_tool.json_search_tool import JSONSearchTool
from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
from .multion_tool.multion_tool import MultiOnTool
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
from .nl2sql.nl2sql_tool import NL2SQLTool
from .pdf_search_tool.pdf_search_tool import PDFSearchTool
from .pg_seach_tool.pg_search_tool import PGSearchTool
from .rag.rag_tool import RagTool
from .scrape_element_from_website.scrape_element_from_website import (
ScrapeElementFromWebsiteTool
ScrapeElementFromWebsiteTool,
)
from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
from .scrapfly_scrape_website_tool.scrapfly_scrape_website_tool import (
ScrapflyScrapeWebsiteTool
ScrapflyScrapeWebsiteTool,
)
from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
from .serper_dev_tool.serper_dev_tool import SerperDevTool
@@ -46,7 +48,7 @@ from .vision_tool.vision_tool import VisionTool
from .website_search.website_search_tool import WebsiteSearchTool
from .xml_search_tool.xml_search_tool import XMLSearchTool
from .youtube_channel_search_tool.youtube_channel_search_tool import (
YoutubeChannelSearchTool
YoutubeChannelSearchTool,
)
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
from .weaviate_tool.vector_search import WeaviateVectorSearchTool

View File

@@ -1,59 +0,0 @@
from typing import Any, Callable
from pydantic import BaseModel as PydanticBaseModel
from crewai.tools.base_tool import BaseTool
from crewai.tools.structured_tool import CrewStructuredTool
class Tool(BaseTool):
func: Callable
"""The function that will be executed when the tool is called."""
def _run(self, *args: Any, **kwargs: Any) -> Any:
return self.func(*args, **kwargs)
def to_langchain(
tools: list[BaseTool | CrewStructuredTool],
) -> list[CrewStructuredTool]:
return [t.to_structured_tool() if isinstance(t, BaseTool) else t for t in tools]
def tool(*args):
"""
Decorator to create a tool from a function.
"""
def _make_with_name(tool_name: str) -> Callable:
def _make_tool(f: Callable) -> BaseTool:
if f.__doc__ is None:
raise ValueError("Function must have a docstring")
if f.__annotations__ is None:
raise ValueError("Function must have type annotations")
class_name = "".join(tool_name.split()).title()
args_schema = type(
class_name,
(PydanticBaseModel,),
{
"__annotations__": {
k: v for k, v in f.__annotations__.items() if k != "return"
},
},
)
return Tool(
name=tool_name,
description=f.__doc__,
func=f,
args_schema=args_schema,
)
return _make_tool
if len(args) == 1 and callable(args[0]):
return _make_with_name(args[0].__name__)(args[0])
if len(args) == 1 and isinstance(args[0], str):
return _make_with_name(args[0])
raise ValueError("Invalid arguments")

View File

@@ -0,0 +1,30 @@
# BraveSearchTool Documentation
## Description
This tool is designed to perform a web search for a specified query from a text's content across the internet. It utilizes the Brave Web Search API, which is a REST API to query Brave Search and get back search results from the web. The following sections describe how to curate requests, including parameters and headers, to Brave Web Search API and get a JSON response back.
## Installation
To incorporate this tool into your project, follow the installation instructions below:
```shell
pip install 'crewai[tools]'
```
## Example
The following example demonstrates how to initialize the tool and execute a search with a given query:
```python
from crewai_tools import BraveSearchTool
# Initialize the tool for internet searching capabilities
tool = BraveSearchTool()
```
## Steps to Get Started
To effectively use the `BraveSearchTool`, follow these steps:
1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
2. **API Key Acquisition**: Acquire a API key [here](https://api.search.brave.com/app/keys).
3. **Environment Configuration**: Store your obtained API key in an environment variable named `BRAVE_API_KEY` to facilitate its use by the tool.
## Conclusion
By integrating the `BraveSearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.

View File

@@ -0,0 +1,118 @@
import datetime
import os
import time
from typing import Any, ClassVar, Optional, Type
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, "w") as file:
file.write(content)
print(f"Results saved to {filename}")
class BraveSearchToolSchema(BaseModel):
"""Input for BraveSearchTool."""
search_query: str = Field(
..., description="Mandatory search query you want to use to search the internet"
)
class BraveSearchTool(BaseTool):
"""
BraveSearchTool - A tool for performing web searches using the Brave Search API.
This module provides functionality to search the internet using Brave's Search API,
supporting customizable result counts and country-specific searches.
Dependencies:
- requests
- pydantic
- python-dotenv (for API key management)
"""
name: str = "Brave Web Search the internet"
description: str = (
"A tool that can be used to search the internet with a search_query."
)
args_schema: Type[BaseModel] = BraveSearchToolSchema
search_url: str = "https://api.search.brave.com/res/v1/web/search"
country: Optional[str] = ""
n_results: int = 10
save_file: bool = False
_last_request_time: ClassVar[float] = 0
_min_request_interval: ClassVar[float] = 1.0 # seconds
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if "BRAVE_API_KEY" not in os.environ:
raise ValueError(
"BRAVE_API_KEY environment variable is required for BraveSearchTool"
)
def _run(
self,
**kwargs: Any,
) -> Any:
current_time = time.time()
if (current_time - self._last_request_time) < self._min_request_interval:
time.sleep(
self._min_request_interval - (current_time - self._last_request_time)
)
BraveSearchTool._last_request_time = time.time()
try:
search_query = kwargs.get("search_query") or kwargs.get("query")
if not search_query:
raise ValueError("Search query is required")
save_file = kwargs.get("save_file", self.save_file)
n_results = kwargs.get("n_results", self.n_results)
payload = {"q": search_query, "count": n_results}
if self.country != "":
payload["country"] = self.country
headers = {
"X-Subscription-Token": os.environ["BRAVE_API_KEY"],
"Accept": "application/json",
}
response = requests.get(self.search_url, headers=headers, params=payload)
response.raise_for_status() # Handle non-200 responses
results = response.json()
if "web" in results:
results = results["web"]["results"]
string = []
for result in results:
try:
string.append(
"\n".join(
[
f"Title: {result['title']}",
f"Link: {result['url']}",
f"Snippet: {result['description']}",
"---",
]
)
)
except KeyError:
continue
content = "\n".join(string)
except requests.RequestException as e:
return f"Error performing search: {str(e)}"
except KeyError as e:
return f"Error parsing search results: {str(e)}"
if save_file:
_save_results_to_file(content)
return f"\nSearch results: {content}\n"
else:
return content

View File

@@ -1,9 +1,8 @@
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class BrowserbaseLoadToolSchema(BaseModel):
url: str = Field(description="Website URL")

View File

@@ -3,10 +3,9 @@ import os
from typing import List, Optional, Type
import docker
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class CodeInterpreterSchema(BaseModel):
"""Input for CodeInterpreterTool."""

View File

@@ -5,8 +5,7 @@ Composio tools wrapper.
import typing as t
import typing_extensions as te
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
class ComposioTool(BaseTool):

View File

@@ -1,11 +1,10 @@
import json
from typing import Type
from crewai.tools import BaseTool
from openai import OpenAI
from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel):
"""Input for Dall-E Tool."""

View File

@@ -1,10 +1,9 @@
import os
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedDirectoryReadToolSchema(BaseModel):
"""Input for DirectoryReadTool."""

View File

@@ -1,10 +1,8 @@
import os
from typing import Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class EXABaseToolToolSchema(BaseModel):
"""Input for EXABaseTool."""

View File

@@ -1,28 +1,30 @@
import os
import requests
from typing import Any
import requests
from .exa_base_tool import EXABaseTool
class EXASearchTool(EXABaseTool):
def _run(
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get('search_query')
if search_query is None:
search_query = kwargs.get('query')
def _run(
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get("search_query")
if search_query is None:
search_query = kwargs.get("query")
payload = {
"query": search_query,
"type": "magic",
}
payload = {
"query": search_query,
"type": "magic",
}
headers = self.headers.copy()
headers["x-api-key"] = os.environ['EXA_API_KEY']
headers = self.headers.copy()
headers["x-api-key"] = os.environ["EXA_API_KEY"]
response = requests.post(self.search_url, json=payload, headers=headers)
results = response.json()
if 'results' in results:
results = super()._parse_results(results['results'])
return results
response = requests.post(self.search_url, json=payload, headers=headers)
results = response.json()
if "results" in results:
results = super()._parse_results(results["results"])
return results

View File

@@ -1,9 +1,8 @@
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedFileReadToolSchema(BaseModel):
"""Input for FileReadTool."""

View File

@@ -1,16 +1,18 @@
import os
from typing import Any, Optional, Type
from pydantic import BaseModel
from ..base_tool import BaseTool
from distutils.util import strtobool
from typing import Any, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel
class FileWriterToolInput(BaseModel):
filename: str
filename: str
directory: Optional[str] = "./"
overwrite: str = "False"
content: str
class FileWriterTool(BaseTool):
name: str = "File Writer Tool"
description: str = (
@@ -26,7 +28,7 @@ class FileWriterTool(BaseTool):
# Construct the full path
filepath = os.path.join(kwargs.get("directory") or "", kwargs["filename"])
# Convert overwrite to boolean
kwargs["overwrite"] = bool(strtobool(kwargs["overwrite"]))
@@ -46,4 +48,4 @@ class FileWriterTool(BaseTool):
except KeyError as e:
return f"An error occurred while accessing key: {str(e)}"
except Exception as e:
return f"An error occurred while writing to the file: {str(e)}"
return f"An error occurred while writing to the file: {str(e)}"

View File

@@ -1,8 +1,7 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field
# Type checking import
if TYPE_CHECKING:
@@ -20,6 +19,9 @@ class FirecrawlCrawlWebsiteToolSchema(BaseModel):
class FirecrawlCrawlWebsiteTool(BaseTool):
model_config = ConfigDict(
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
)
name: str = "Firecrawl web crawl tool"
description: str = "Crawl webpages using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema
@@ -50,3 +52,15 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
return self.firecrawl.crawl_url(url, options)
try:
from firecrawl import FirecrawlApp
# Must rebuild model after class is defined
FirecrawlCrawlWebsiteTool.model_rebuild()
except ImportError:
"""
When this tool is not used, then exception can be ignored.
"""
pass

View File

@@ -1,8 +1,7 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
from pydantic import BaseModel, ConfigDict, Field
# Type checking import
if TYPE_CHECKING:
@@ -24,6 +23,9 @@ class FirecrawlScrapeWebsiteToolSchema(BaseModel):
class FirecrawlScrapeWebsiteTool(BaseTool):
model_config = ConfigDict(
arbitrary_types_allowed=True, validate_assignment=True, frozen=False
)
name: str = "Firecrawl web scrape tool"
description: str = "Scrape webpages url using Firecrawl and return the contents"
args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema
@@ -61,3 +63,15 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
"timeout": timeout,
}
return self.firecrawl.scrape_url(url, options)
try:
from firecrawl import FirecrawlApp
# Must rebuild model after class is defined
FirecrawlScrapeWebsiteTool.model_rebuild()
except ImportError:
"""
When this tool is not used, then exception can be ignored.
"""
pass

View File

@@ -1,9 +1,8 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
# Type checking import
if TYPE_CHECKING:
from firecrawl import FirecrawlApp

View File

@@ -0,0 +1,38 @@
# JinaScrapeWebsiteTool
## Description
A tool designed to extract and read the content of a specified website by using Jina.ai reader. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
## Installation
Install the crewai_tools package
```shell
pip install 'crewai[tools]'
```
## Example
```python
from crewai_tools import JinaScrapeWebsiteTool
# To enable scraping any website it finds during its execution
tool = JinaScrapeWebsiteTool(api_key='YOUR_API_KEY')
# Initialize the tool with the website URL, so the agent can only scrape the content of the specified website
tool = JinaScrapeWebsiteTool(website_url='https://www.example.com')
# With custom headers
tool = JinaScrapeWebsiteTool(
website_url='https://www.example.com',
custom_headers={'X-Target-Selector': 'body, .class, #id'}
)
```
## Authentication
The tool uses Jina.ai's reader service. While it can work without an API key, Jina.ai may apply rate limiting or blocking to unauthenticated requests. For production use, it's recommended to provide an API key.
## Arguments
- `website_url`: Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read.
- `api_key`: Optional Jina.ai API key for authenticated access to the reader service.
- `custom_headers`: Optional dictionary of HTTP headers to use when making requests.
## Note
This tool is an alternative to the standard `ScrapeWebsiteTool` that specifically uses Jina.ai's reader service for enhanced content extraction. Choose this tool when you need more sophisticated content parsing capabilities.

View File

@@ -0,0 +1,54 @@
from typing import Optional, Type
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
class JinaScrapeWebsiteToolInput(BaseModel):
"""Input schema for JinaScrapeWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
class JinaScrapeWebsiteTool(BaseTool):
name: str = "JinaScrapeWebsiteTool"
description: str = (
"A tool that can be used to read a website content using Jina.ai reader and return markdown content."
)
args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput
website_url: Optional[str] = None
api_key: Optional[str] = None
headers: dict = {}
def __init__(
self,
website_url: Optional[str] = None,
api_key: Optional[str] = None,
custom_headers: Optional[dict] = None,
**kwargs,
):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.description = f"A tool that can be used to read {website_url}'s content and return markdown content."
self._generate_description()
if custom_headers is not None:
self.headers = custom_headers
if api_key is not None:
self.headers["Authorization"] = f"Bearer {api_key}"
def _run(self, website_url: Optional[str] = None) -> str:
url = website_url or self.website_url
if not url:
raise ValueError(
"Website URL must be provided either during initialization or execution"
)
response = requests.get(
f"https://r.jina.ai/{url}", headers=self.headers, timeout=15
)
response.raise_for_status()
return response.text

View File

@@ -1,9 +1,8 @@
from typing import Any, Optional, Type, cast
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class LlamaIndexTool(BaseTool):
"""Tool to wrap LlamaIndex tools/query engines."""

View File

@@ -2,7 +2,7 @@
from typing import Any, Optional
from crewai_tools.tools.base_tool import BaseTool
from crewai.tools import BaseTool
class MultiOnTool(BaseTool):

View File

@@ -1,11 +1,10 @@
from typing import Any, Union
from typing import Any, Type, Union
from ..base_tool import BaseTool
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from typing import Type, Any
class NL2SQLToolInput(BaseModel):
sql_query: str = Field(
@@ -13,6 +12,7 @@ class NL2SQLToolInput(BaseModel):
description="The SQL query to execute.",
)
class NL2SQLTool(BaseTool):
name: str = "NL2SQLTool"
description: str = "Converts natural language to SQL queries and executes them."

View File

@@ -1,10 +1,9 @@
from abc import ABC, abstractmethod
from typing import Any
from crewai.tools import BaseTool
from pydantic import BaseModel, Field, model_validator
from crewai_tools.tools.base_tool import BaseTool
class Adapter(BaseModel, ABC):
class Config:

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
import requests
from bs4 import BeautifulSoup
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
"""Input for ScrapeElementFromWebsiteTool."""

View File

@@ -1,12 +1,12 @@
import os
import re
from typing import Any, Optional, Type
import requests
from bs4 import BeautifulSoup
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedScrapeWebsiteToolSchema(BaseModel):
"""Input for ScrapeWebsiteTool."""
@@ -67,7 +67,7 @@ class ScrapeWebsiteTool(BaseTool):
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text()
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
text = parsed.get_text(" ")
text = re.sub("[ \t]+", " ", text)
text = re.sub("\\s+\n\\s+", "\n", text)
return text

View File

@@ -1,10 +1,9 @@
import logging
from typing import Any, Dict, Literal, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
logger = logging.getLogger(__file__)

View File

@@ -1,14 +1,12 @@
import time
from typing import Any, Optional, Type
from bs4 import BeautifulSoup
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from ..base_tool import BaseTool
class FixedSeleniumScrapingToolSchema(BaseModel):
"""Input for SeleniumScrapingTool."""

View File

@@ -5,16 +5,15 @@ import logging
from typing import Any, Type
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai.tools import BaseTool
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file."""
try:

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
from urllib.parse import urlencode
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
from urllib.parse import urlencode
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""

View File

@@ -3,10 +3,9 @@ from typing import Any, Optional, Type
from urllib.parse import urlencode
import requests
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search."""

View File

@@ -1,9 +1,8 @@
from typing import Any, Dict, Literal, Optional, Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SpiderToolSchema(BaseModel):
url: str = Field(description="Website URL")

View File

@@ -2,11 +2,10 @@ import base64
from typing import Type
import requests
from crewai.tools import BaseTool
from openai import OpenAI
from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel):
"""Input for Vision Tool."""

View File

@@ -0,0 +1,80 @@
# WeaviateVectorSearchTool
## Description
This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query.
Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect
## Installation
Install the crewai_tools package by executing the following command in your terminal:
```shell
uv pip install 'crewai[tools]'
```
## Example
To utilize the WeaviateVectorSearchTool for different use cases, follow these examples:
```python
from crewai_tools import WeaviateVectorSearchTool
# To enable the tool to search any website the agent comes across or learns about during its operation
tool = WeaviateVectorSearchTool(
collection_name='example_collections',
limit=3,
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
weaviate_api_key="your-weaviate-api-key",
)
# or
# Setup custom model for vectorizer and generative model
tool = WeaviateVectorSearchTool(
collection_name='example_collections',
limit=3,
vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"),
generative_model=Configure.Generative.openai(model="gpt-4o-mini"),
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
weaviate_api_key="your-weaviate-api-key",
)
# Adding the tool to an agent
rag_agent = Agent(
name="rag_agent",
role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.",
llm="gpt-4o-mini",
tools=[tool],
)
```
## Arguments
- `collection_name` : The name of the collection to search within. (Required)
- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required)
- `weaviate_api_key` : The API key for the Weaviate cluster. (Required)
- `limit` : The number of results to return. (Optional)
- `vectorizer` : The vectorizer to use. (Optional)
- `generative_model` : The generative model to use. (Optional)
Preloading the Weaviate database with documents:
```python
from crewai_tools import WeaviateVectorSearchTool
# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect
test_docs = client.collections.get("example_collections")
docs_to_load = os.listdir("knowledge")
with test_docs.batch.dynamic() as batch:
for d in docs_to_load:
with open(os.path.join("knowledge", d), "r") as f:
content = f.read()
batch.add_object(
{
"content": content,
"year": d.split("_")[0],
}
)
tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3)
```

View File

@@ -0,0 +1,89 @@
import os
import json
import weaviate
from pydantic import BaseModel, Field
from typing import Type, Optional
from crewai.tools import BaseTool
from weaviate.classes.config import Configure, Vectorizers
from weaviate.classes.init import Auth
class WeaviateToolSchema(BaseModel):
"""Input for WeaviateTool."""
query: str = Field(
...,
description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.",
)
class WeaviateVectorSearchTool(BaseTool):
"""Tool to search the Weaviate database"""
name: str = "WeaviateVectorSearchTool"
description: str = "A tool to search the Weaviate database for relevant information on internal documents."
args_schema: Type[BaseModel] = WeaviateToolSchema
query: Optional[str] = None
vectorizer: Optional[Vectorizers] = Field(
default=Configure.Vectorizer.text2vec_openai(
model="nomic-embed-text",
)
)
generative_model: Optional[str] = Field(
default=Configure.Generative.openai(
model="gpt-4o",
),
)
collection_name: Optional[str] = None
limit: Optional[int] = Field(default=3)
headers: Optional[dict] = Field(
default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}
)
weaviate_cluster_url: str = Field(
...,
description="The URL of the Weaviate cluster",
)
weaviate_api_key: str = Field(
...,
description="The API key for the Weaviate cluster",
)
def _run(self, query: str) -> str:
"""Search the Weaviate database
Args:
query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question.
Returns:
str: The result of the search query
"""
if not self.weaviate_cluster_url or not self.weaviate_api_key:
raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set")
client = weaviate.connect_to_weaviate_cloud(
cluster_url=self.weaviate_cluster_url,
auth_credentials=Auth.api_key(self.weaviate_api_key),
headers=self.headers,
)
internal_docs = client.collections.get(self.collection_name)
if not internal_docs:
internal_docs = client.collections.create(
name=self.collection_name,
vectorizer_config=self.vectorizer,
generative_config=self.generative_model,
)
response = internal_docs.query.near_text(
query=query,
limit=self.limit,
)
json_response = ""
for obj in response.objects:
json_response += json.dumps(obj.properties, indent=2)
client.close()
return json_response

View File

@@ -0,0 +1,50 @@
from unittest.mock import patch
import pytest
from crewai_tools.tools.brave_search_tool.brave_search_tool import BraveSearchTool
@pytest.fixture
def brave_tool():
return BraveSearchTool(n_results=2)
def test_brave_tool_initialization():
tool = BraveSearchTool()
assert tool.n_results == 10
assert tool.save_file is False
@patch("requests.get")
def test_brave_tool_search(mock_get, brave_tool):
mock_response = {
"web": {
"results": [
{
"title": "Test Title",
"url": "http://test.com",
"description": "Test Description",
}
]
}
}
mock_get.return_value.json.return_value = mock_response
result = brave_tool.run(search_query="test")
assert "Test Title" in result
assert "http://test.com" in result
def test_brave_tool():
tool = BraveSearchTool(
n_results=2,
)
x = tool.run(search_query="ChatGPT")
print(x)
if __name__ == "__main__":
test_brave_tool()
test_brave_tool_initialization()
# test_brave_tool_search(brave_tool)