Merge pull request #101 from crewAIInc/brandon/cre-250-llamaindex-pydantic-v2

Pydantic v1 issues
This commit is contained in:
Rip&Tear
2024-09-13 21:53:32 +08:00
committed by GitHub
39 changed files with 752 additions and 550 deletions

View File

@@ -3,11 +3,11 @@ from typing import Any, Callable, Optional, Type
from langchain_core.tools import StructuredTool
from pydantic import BaseModel, ConfigDict, Field, validator
from pydantic.v1 import BaseModel as V1BaseModel
from pydantic import BaseModel as PydanticBaseModel
class BaseTool(BaseModel, ABC):
class _ArgsSchemaPlaceholder(V1BaseModel):
class _ArgsSchemaPlaceholder(PydanticBaseModel):
pass
model_config = ConfigDict()
@@ -16,7 +16,7 @@ class BaseTool(BaseModel, ABC):
"""The unique name of the tool that clearly communicates its purpose."""
description: str
"""Used to tell the model how/when/why to use the tool."""
args_schema: Type[V1BaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
args_schema: Type[PydanticBaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
"""The schema for the arguments that the tool accepts."""
description_updated: bool = False
"""Flag to check if the description has been updated."""
@@ -26,13 +26,15 @@ class BaseTool(BaseModel, ABC):
"""Flag to check if the tool should be the final agent answer."""
@validator("args_schema", always=True, pre=True)
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
def _default_args_schema(
cls, v: Type[PydanticBaseModel]
) -> Type[PydanticBaseModel]:
if not isinstance(v, cls._ArgsSchemaPlaceholder):
return v
return type(
f"{cls.__name__}Schema",
(V1BaseModel,),
(PydanticBaseModel,),
{
"__annotations__": {
k: v for k, v in cls._run.__annotations__.items() if k != "return"
@@ -75,7 +77,7 @@ class BaseTool(BaseModel, ABC):
class_name = f"{self.__class__.__name__}Schema"
self.args_schema = type(
class_name,
(V1BaseModel,),
(PydanticBaseModel,),
{
"__annotations__": {
k: v
@@ -127,7 +129,7 @@ def tool(*args):
class_name = "".join(tool_name.split()).title()
args_schema = type(
class_name,
(V1BaseModel,),
(PydanticBaseModel,),
{
"__annotations__": {
k: v for k, v in f.__annotations__.items() if k != "return"

View File

@@ -1,13 +1,19 @@
from typing import Optional, Any, Type
from pydantic.v1 import BaseModel, Field
from typing import Any, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class BrowserbaseLoadToolSchema(BaseModel):
url: str = Field(description="Website URL")
class BrowserbaseLoadTool(BaseTool):
name: str = "Browserbase web load tool"
description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
description: str = (
"Load webpages url in a headless browser using Browserbase and return the contents"
)
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
api_key: Optional[str] = None
project_id: Optional[str] = None

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -3,8 +3,9 @@ import os
from typing import List, Optional, Type
import docker
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
from pydantic.v1 import BaseModel, Field
class CodeInterpreterSchema(BaseModel):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -1,9 +1,10 @@
import json
from typing import Type
from crewai_tools.tools.base_tool import BaseTool
from openai import OpenAI
from pydantic.v1 import BaseModel
from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel):

View File

@@ -1,38 +1,50 @@
import os
from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from typing import Any, Optional, Type
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedDirectoryReadToolSchema(BaseModel):
"""Input for DirectoryReadTool."""
pass
"""Input for DirectoryReadTool."""
pass
class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
"""Input for DirectoryReadTool."""
directory: str = Field(..., description="Mandatory directory to list content")
"""Input for DirectoryReadTool."""
directory: str = Field(..., description="Mandatory directory to list content")
class DirectoryReadTool(BaseTool):
name: str = "List files in directory"
description: str = "A tool that can be used to recursively list a directory's content."
args_schema: Type[BaseModel] = DirectoryReadToolSchema
directory: Optional[str] = None
name: str = "List files in directory"
description: str = (
"A tool that can be used to recursively list a directory's content."
)
args_schema: Type[BaseModel] = DirectoryReadToolSchema
directory: Optional[str] = None
def __init__(self, directory: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if directory is not None:
self.directory = directory
self.description = f"A tool that can be used to list {directory}'s content."
self.args_schema = FixedDirectoryReadToolSchema
self._generate_description()
def _run(
self,
**kwargs: Any,
) -> Any:
directory = kwargs.get('directory', self.directory)
if directory[-1] == "/":
directory = directory[:-1]
files_list = [f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}" for root, dirs, files in os.walk(directory) for filename in files]
files = "\n- ".join(files_list)
return f"File paths: \n-{files}"
def __init__(self, directory: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if directory is not None:
self.directory = directory
self.description = f"A tool that can be used to list {directory}'s content."
self.args_schema = FixedDirectoryReadToolSchema
self._generate_description()
def _run(
self,
**kwargs: Any,
) -> Any:
directory = kwargs.get("directory", self.directory)
if directory[-1] == "/":
directory = directory[:-1]
files_list = [
f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}"
for root, dirs, files in os.walk(directory)
for filename in files
]
files = "\n- ".join(files_list)
return f"File paths: \n-{files}"

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.loaders.directory_loader import DirectoryLoader
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -1,26 +1,32 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
class FixedDOCXSearchToolSchema(BaseModel):
"""Input for DOCXSearchTool."""
docx: Optional[str] = Field(..., description="Mandatory docx path you want to search")
docx: Optional[str] = Field(
..., description="Mandatory docx path you want to search"
)
search_query: str = Field(
...,
description="Mandatory search query you want to use to search the DOCX's content",
)
class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
"""Input for DOCXSearchTool."""
search_query: str = Field(
...,
description="Mandatory search query you want to use to search the DOCX's content",
)
class DOCXSearchTool(RagTool):
name: str = "Search a DOCX's content"
description: str = (
@@ -56,9 +62,9 @@ class DOCXSearchTool(RagTool):
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get('search_query')
search_query = kwargs.get("search_query")
if search_query is None:
search_query = kwargs.get('query')
search_query = kwargs.get("query")
docx = kwargs.get("docx")
if docx is not None:

View File

@@ -1,36 +1,49 @@
import os
from typing import Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class EXABaseToolToolSchema(BaseModel):
"""Input for EXABaseTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
"""Input for EXABaseTool."""
search_query: str = Field(
..., description="Mandatory search query you want to use to search the internet"
)
class EXABaseTool(BaseTool):
name: str = "Search the internet"
description: str = "A tool that can be used to search the internet from a search_query"
args_schema: Type[BaseModel] = EXABaseToolToolSchema
search_url: str = "https://api.exa.ai/search"
n_results: int = None
headers: dict = {
"accept": "application/json",
"content-type": "application/json",
}
name: str = "Search the internet"
description: str = (
"A tool that can be used to search the internet from a search_query"
)
args_schema: Type[BaseModel] = EXABaseToolToolSchema
search_url: str = "https://api.exa.ai/search"
n_results: int = None
headers: dict = {
"accept": "application/json",
"content-type": "application/json",
}
def _parse_results(self, results):
stirng = []
for result in results:
try:
stirng.append('\n'.join([
f"Title: {result['title']}",
f"Score: {result['score']}",
f"Url: {result['url']}",
f"ID: {result['id']}",
"---"
]))
except KeyError:
next
def _parse_results(self, results):
stirng = []
for result in results:
try:
stirng.append(
"\n".join(
[
f"Title: {result['title']}",
f"Score: {result['score']}",
f"Url: {result['url']}",
f"ID: {result['id']}",
"---",
]
)
)
except KeyError:
next
content = '\n'.join(stirng)
return f"\nSearch results: {content}\n"
content = "\n".join(stirng)
return f"\nSearch results: {content}\n"

View File

@@ -1,19 +1,20 @@
from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from typing import Any, Optional, Type
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedFileReadToolSchema(BaseModel):
"""Input for FileReadTool."""
pass
class FileReadToolSchema(FixedFileReadToolSchema):
"""Input for FileReadTool."""
file_path: str = Field(
...,
description="Mandatory file full path to read the file"
)
file_path: str = Field(..., description="Mandatory file full path to read the file")
class FileReadTool(BaseTool):
@@ -22,11 +23,7 @@ class FileReadTool(BaseTool):
args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None
def __init__(
self,
file_path: Optional[str] = None,
**kwargs
):
def __init__(self, file_path: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if file_path is not None:
self.file_path = file_path
@@ -39,8 +36,8 @@ class FileReadTool(BaseTool):
**kwargs: Any,
) -> Any:
try:
file_path = kwargs.get('file_path', self.file_path)
with open(file_path, 'r') as file:
file_path = kwargs.get("file_path", self.file_path)
with open(file_path, "r") as file:
return file.read()
except Exception as e:
return f"Fail to read the file {file_path}. Error: {e}"

View File

@@ -1,39 +1,46 @@
import os
from typing import Optional, Type, Any
from typing import Any, Optional, Type
from pydantic import BaseModel
from pydantic.v1 import BaseModel
from ..base_tool import BaseTool
class FileWriterToolInput(BaseModel):
filename: str
content: str
directory: Optional[str] = None
overwrite: bool = False
class FileWriterTool(BaseTool):
name: str = "File Writer Tool"
description: str = "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
description: str = (
"A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
)
args_schema: Type[BaseModel] = FileWriterToolInput
def _run(self, **kwargs: Any) -> str:
try:
# Create the directory if it doesn't exist
if kwargs['directory'] and not os.path.exists(kwargs['directory']):
os.makedirs(kwargs['directory'])
if kwargs["directory"] and not os.path.exists(kwargs["directory"]):
os.makedirs(kwargs["directory"])
# Construct the full path
filepath = os.path.join(kwargs['directory'] or '', kwargs['filename'])
filepath = os.path.join(kwargs["directory"] or "", kwargs["filename"])
# Check if file exists and overwrite is not allowed
if os.path.exists(filepath) and not kwargs['overwrite']:
if os.path.exists(filepath) and not kwargs["overwrite"]:
return f"File {filepath} already exists and overwrite option was not passed."
# Write content to the file
mode = 'w' if kwargs['overwrite'] else 'x'
mode = "w" if kwargs["overwrite"] else "x"
with open(filepath, mode) as file:
file.write(kwargs['content'])
file.write(kwargs["content"])
return f"Content successfully written to {filepath}"
except FileExistsError:
return f"File {filepath} already exists and overwrite option was not passed."
return (
f"File {filepath} already exists and overwrite option was not passed."
)
except Exception as e:
return f"An error occurred while writing to the file: {str(e)}"

View File

@@ -1,11 +1,19 @@
from typing import Optional, Any, Type, Dict, List
from pydantic.v1 import BaseModel, Field
from typing import Any, Dict, List, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
crawler_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for crawling"
)
page_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for page"
)
class FirecrawlCrawlWebsiteTool(BaseTool):
name: str = "Firecrawl web crawl tool"
@@ -17,22 +25,24 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
if (crawler_options is None):
def _run(
self,
url: str,
crawler_options: Optional[Dict[str, Any]] = None,
page_options: Optional[Dict[str, Any]] = None,
):
if crawler_options is None:
crawler_options = {}
if (page_options is None):
if page_options is None:
page_options = {}
options = {
"crawlerOptions": crawler_options,
"pageOptions": page_options
}
return self.firecrawl.crawl_url(url, options)
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
return self.firecrawl.crawl_url(url, options)

View File

@@ -1,12 +1,23 @@
from typing import Optional, Any, Type, Dict
from pydantic.v1 import BaseModel, Field
from typing import Any, Dict, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
timeout: Optional[int] = Field(default=None, description="Timeout in milliseconds for the scraping operation. The default value is 30000.")
page_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for page scraping"
)
extractor_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for data extraction"
)
timeout: Optional[int] = Field(
default=None,
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
)
class FirecrawlScrapeWebsiteTool(BaseTool):
name: str = "Firecrawl web scrape tool"
@@ -18,15 +29,21 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
def _run(
self,
url: str,
page_options: Optional[Dict[str, Any]] = None,
extractor_options: Optional[Dict[str, Any]] = None,
timeout: Optional[int] = None,
):
if page_options is None:
page_options = {}
if extractor_options is None:
@@ -37,6 +54,6 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
options = {
"pageOptions": page_options,
"extractorOptions": extractor_options,
"timeout": timeout
"timeout": timeout,
}
return self.firecrawl.scrape_url(url, options)
return self.firecrawl.scrape_url(url, options)

View File

@@ -1,11 +1,19 @@
from typing import Optional, Any, Type, Dict, List
from pydantic.v1 import BaseModel, Field
from typing import Any, Dict, List, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class FirecrawlSearchToolSchema(BaseModel):
query: str = Field(description="Search query")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
page_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for result formatting"
)
search_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for searching"
)
class FirecrawlSearchTool(BaseTool):
name: str = "Firecrawl web search tool"
@@ -17,22 +25,24 @@ class FirecrawlSearchTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from firecrawl import FirecrawlApp # type: ignore
from firecrawl import FirecrawlApp # type: ignore
except ImportError:
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`"
)
self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
if (page_options is None):
def _run(
self,
query: str,
page_options: Optional[Dict[str, Any]] = None,
result_options: Optional[Dict[str, Any]] = None,
):
if page_options is None:
page_options = {}
if (result_options is None):
if result_options is None:
result_options = {}
options = {
"pageOptions": page_options,
"resultOptions": result_options
}
options = {"pageOptions": page_options, "resultOptions": result_options}
return self.firecrawl.search(query, options)

View File

@@ -1,7 +1,7 @@
from typing import Any, List, Optional, Type
from embedchain.loaders.github import GithubLoader
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -27,7 +27,9 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
class GithubSearchTool(RagTool):
name: str = "Search a github repo's content"
description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
description: str = (
"A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
)
summarize: bool = False
gh_token: str
args_schema: Type[BaseModel] = GithubSearchToolSchema

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -1,50 +1,48 @@
import os
import json
import requests
from typing import Any, Optional, Type, cast
from pydantic import BaseModel, Field
from typing import Type, Any, cast, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class LlamaIndexTool(BaseTool):
"""Tool to wrap LlamaIndex tools/query engines."""
llama_index_tool: Any
def _run(
self,
self,
*args: Any,
**kwargs: Any,
) -> Any:
**kwargs: Any,
) -> Any:
"""Run tool."""
from llama_index.core.tools import BaseTool as LlamaBaseTool
tool = cast(LlamaBaseTool, self.llama_index_tool)
return tool(*args, **kwargs)
@classmethod
def from_tool(
cls,
tool: Any,
**kwargs: Any
) -> "LlamaIndexTool":
def from_tool(cls, tool: Any, **kwargs: Any) -> "LlamaIndexTool":
from llama_index.core.tools import BaseTool as LlamaBaseTool
if not isinstance(tool, LlamaBaseTool):
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
tool = cast(LlamaBaseTool, tool)
if tool.metadata.fn_schema is None:
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
raise ValueError(
"The LlamaIndex tool does not have an fn_schema specified."
)
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
return cls(
name=tool.metadata.name,
description=tool.metadata.description,
args_schema=args_schema,
llama_index_tool=tool,
**kwargs
**kwargs,
)
@classmethod
def from_query_engine(
cls,
@@ -52,7 +50,7 @@ class LlamaIndexTool(BaseTool):
name: Optional[str] = None,
description: Optional[str] = None,
return_direct: bool = False,
**kwargs: Any
**kwargs: Any,
) -> "LlamaIndexTool":
from llama_index.core.query_engine import BaseQueryEngine
from llama_index.core.tools import QueryEngineTool
@@ -60,10 +58,11 @@ class LlamaIndexTool(BaseTool):
if not isinstance(query_engine, BaseQueryEngine):
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
# NOTE: by default the schema expects an `input` variable. However this
# NOTE: by default the schema expects an `input` variable. However this
# confuses crewAI so we are renaming to `query`.
class QueryToolSchema(BaseModel):
"""Schema for query tool."""
query: str = Field(..., description="Search query for the query tool.")
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
@@ -72,13 +71,9 @@ class LlamaIndexTool(BaseTool):
name=name,
description=description,
return_direct=return_direct,
resolve_input_errors=True,
resolve_input_errors=True,
)
# HACK: we are replacing the schema with our custom schema
query_engine_tool.metadata.fn_schema = QueryToolSchema
return cls.from_tool(
query_engine_tool,
**kwargs
)
return cls.from_tool(query_engine_tool, **kwargs)

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -1,7 +1,7 @@
from typing import Any, Type
from embedchain.loaders.mysql import MySQLLoader
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -17,7 +17,9 @@ class MySQLSearchToolSchema(BaseModel):
class MySQLSearchTool(RagTool):
name: str = "Search a database's table content"
description: str = "A tool that can be used to semantic search a query from a database table's content."
description: str = (
"A tool that can be used to semantic search a query from a database table's content."
)
args_schema: Type[BaseModel] = MySQLSearchToolSchema
db_uri: str = Field(..., description="Mandatory database URI")

View File

@@ -1,8 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic import model_validator
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field, model_validator
from ..rag.rag_tool import RagTool

View File

@@ -1,7 +1,7 @@
from typing import Any, Type
from embedchain.loaders.postgres import PostgresLoader
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -17,7 +17,9 @@ class PGSearchToolSchema(BaseModel):
class PGSearchTool(RagTool):
name: str = "Search a database's table content"
description: str = "A tool that can be used to semantic search a query from a database table's content."
description: str = (
"A tool that can be used to semantic search a query from a database table's content."
)
args_schema: Type[BaseModel] = PGSearchToolSchema
db_uri: str = Field(..., description="Mandatory database URI")

View File

@@ -1,57 +1,76 @@
import os
from typing import Any, Optional, Type
import requests
from bs4 import BeautifulSoup
from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
"""Input for ScrapeElementFromWebsiteTool."""
pass
"""Input for ScrapeElementFromWebsiteTool."""
pass
class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema):
"""Input for ScrapeElementFromWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
"""Input for ScrapeElementFromWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(
...,
description="Mandatory css reference for element to scrape from the website",
)
class ScrapeElementFromWebsiteTool(BaseTool):
name: str = "Read a website content"
description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
website_url: Optional[str] = None
cookies: Optional[dict] = None
css_element: Optional[str] = None
headers: Optional[dict] = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.google.com/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Accept-Encoding': 'gzip, deflate, br'
}
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.css_element = css_element
self.description = f"A tool that can be used to read {website_url}'s content."
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get('website_url', self.website_url)
css_element = kwargs.get('css_element', self.css_element)
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
parsed = BeautifulSoup(page.content, "html.parser")
elements = parsed.select(css_element)
return "\n".join([element.get_text() for element in elements])
name: str = "Read a website content"
description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
website_url: Optional[str] = None
cookies: Optional[dict] = None
css_element: Optional[str] = None
headers: Optional[dict] = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://www.google.com/",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Accept-Encoding": "gzip, deflate, br",
}
def __init__(
self,
website_url: Optional[str] = None,
cookies: Optional[dict] = None,
css_element: Optional[str] = None,
**kwargs,
):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.css_element = css_element
self.description = (
f"A tool that can be used to read {website_url}'s content."
)
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get("website_url", self.website_url)
css_element = kwargs.get("css_element", self.css_element)
page = requests.get(
website_url,
headers=self.headers,
cookies=self.cookies if self.cookies else {},
)
parsed = BeautifulSoup(page.content, "html.parser")
elements = parsed.select(css_element)
return "\n".join([element.get_text() for element in elements])

View File

@@ -1,59 +1,73 @@
import os
from typing import Any, Optional, Type
import requests
from bs4 import BeautifulSoup
from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..base_tool import BaseTool
class FixedScrapeWebsiteToolSchema(BaseModel):
"""Input for ScrapeWebsiteTool."""
pass
"""Input for ScrapeWebsiteTool."""
pass
class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema):
"""Input for ScrapeWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
"""Input for ScrapeWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
class ScrapeWebsiteTool(BaseTool):
name: str = "Read website content"
description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
website_url: Optional[str] = None
cookies: Optional[dict] = None
headers: Optional[dict] = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'en-US,en;q=0.9',
'Referer': 'https://www.google.com/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
name: str = "Read website content"
description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
website_url: Optional[str] = None
cookies: Optional[dict] = None
headers: Optional[dict] = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://www.google.com/",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
}
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.description = f"A tool that can be used to read {website_url}'s content."
self.args_schema = FixedScrapeWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def __init__(
self,
website_url: Optional[str] = None,
cookies: Optional[dict] = None,
**kwargs,
):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.description = (
f"A tool that can be used to read {website_url}'s content."
)
self.args_schema = FixedScrapeWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get('website_url', self.website_url)
page = requests.get(
website_url,
timeout=15,
headers=self.headers,
cookies=self.cookies if self.cookies else {}
)
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get("website_url", self.website_url)
page = requests.get(
website_url,
timeout=15,
headers=self.headers,
cookies=self.cookies if self.cookies else {},
)
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text()
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
return text
text = parsed.get_text()
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
return text

View File

@@ -1,20 +1,31 @@
import logging
from typing import Any, Dict, Literal, Optional, Type
from pydantic import BaseModel, Field
from typing import Optional, Any, Type, Dict, Literal
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
logger = logging.getLogger(__file__)
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Webpage URL")
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format")
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config")
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures")
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(
default="markdown", description="Webpage extraction format"
)
scrape_config: Optional[Dict[str, Any]] = Field(
default=None, description="Scrapfly request scrape config"
)
ignore_scrape_failures: Optional[bool] = Field(
default=None, description="whether to ignore failures"
)
class ScrapflyScrapeWebsiteTool(BaseTool):
name: str = "Scrapfly web scraping API tool"
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text"
description: str = (
"Scrape a webpage url using Scrapfly and return its content as markdown or text"
)
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
api_key: str = None
scrapfly: Optional[Any] = None
@@ -29,7 +40,13 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
)
self.scrapfly = ScrapflyClient(key=api_key)
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None):
def _run(
self,
url: str,
scrape_format: str = "markdown",
scrape_config: Optional[Dict[str, Any]] = None,
ignore_scrape_failures: Optional[bool] = None,
):
from scrapfly import ScrapeApiResponse, ScrapeConfig
scrape_config = scrape_config if scrape_config is not None else {}
@@ -44,4 +61,3 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
return None
else:
raise e

View File

@@ -1,77 +1,94 @@
from typing import Optional, Type, Any
import time
from pydantic.v1 import BaseModel, Field
from typing import Any, Optional, Type
from bs4 import BeautifulSoup
from pydantic import BaseModel, Field
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from ..base_tool import BaseTool
class FixedSeleniumScrapingToolSchema(BaseModel):
"""Input for SeleniumScrapingTool."""
pass
"""Input for SeleniumScrapingTool."""
pass
class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
"""Input for SeleniumScrapingTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
"""Input for SeleniumScrapingTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(
...,
description="Mandatory css reference for element to scrape from the website",
)
class SeleniumScrapingTool(BaseTool):
name: str = "Read a website content"
description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
website_url: Optional[str] = None
driver: Optional[Any] = webdriver.Chrome
cookie: Optional[dict] = None
wait_time: Optional[int] = 3
css_element: Optional[str] = None
name: str = "Read a website content"
description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
website_url: Optional[str] = None
driver: Optional[Any] = webdriver.Chrome
cookie: Optional[dict] = None
wait_time: Optional[int] = 3
css_element: Optional[str] = None
def __init__(self, website_url: Optional[str] = None, cookie: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if cookie is not None:
self.cookie = cookie
def __init__(
self,
website_url: Optional[str] = None,
cookie: Optional[dict] = None,
css_element: Optional[str] = None,
**kwargs,
):
super().__init__(**kwargs)
if cookie is not None:
self.cookie = cookie
if css_element is not None:
self.css_element = css_element
if css_element is not None:
self.css_element = css_element
if website_url is not None:
self.website_url = website_url
self.description = f"A tool that can be used to read {website_url}'s content."
self.args_schema = FixedSeleniumScrapingToolSchema
if website_url is not None:
self.website_url = website_url
self.description = (
f"A tool that can be used to read {website_url}'s content."
)
self.args_schema = FixedSeleniumScrapingToolSchema
self._generate_description()
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get('website_url', self.website_url)
css_element = kwargs.get('css_element', self.css_element)
driver = self._create_driver(website_url, self.cookie, self.wait_time)
self._generate_description()
content = []
if css_element is None or css_element.strip() == "":
body_text = driver.find_element(By.TAG_NAME, "body").text
content.append(body_text)
else:
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
content.append(element.text)
driver.close()
return "\n".join(content)
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get("website_url", self.website_url)
css_element = kwargs.get("css_element", self.css_element)
driver = self._create_driver(website_url, self.cookie, self.wait_time)
def _create_driver(self, url, cookie, wait_time):
options = Options()
options.add_argument("--headless")
driver = self.driver(options=options)
driver.get(url)
time.sleep(wait_time)
if cookie:
driver.add_cookie(cookie)
time.sleep(wait_time)
driver.get(url)
time.sleep(wait_time)
return driver
content = []
if css_element is None or css_element.strip() == "":
body_text = driver.find_element(By.TAG_NAME, "body").text
content.append(body_text)
else:
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
content.append(element.text)
driver.close()
return "\n".join(content)
def close(self):
self.driver.close()
def _create_driver(self, url, cookie, wait_time):
options = Options()
options.add_argument("--headless")
driver = self.driver(options=options)
driver.get(url)
time.sleep(wait_time)
if cookie:
driver.add_cookie(cookie)
time.sleep(wait_time)
driver.get(url)
time.sleep(wait_time)
return driver
def close(self):
self.driver.close()

View File

@@ -1,80 +1,94 @@
import datetime
import os
import json
import requests
import os
from typing import Any, Optional, Type
import requests
from pydantic import BaseModel, Field
from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, 'w') as file:
file.write(content)
print(f"Results saved to {filename}")
"""Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, "w") as file:
file.write(content)
print(f"Results saved to {filename}")
class SerperDevToolSchema(BaseModel):
"""Input for SerperDevTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
"""Input for SerperDevTool."""
search_query: str = Field(
..., description="Mandatory search query you want to use to search the internet"
)
class SerperDevTool(BaseTool):
name: str = "Search the internet"
description: str = "A tool that can be used to search the internet with a search_query."
args_schema: Type[BaseModel] = SerperDevToolSchema
search_url: str = "https://google.serper.dev/search"
country: Optional[str] = ''
location: Optional[str] = ''
locale: Optional[str] = ''
n_results: int = 10
save_file: bool = False
name: str = "Search the internet"
description: str = (
"A tool that can be used to search the internet with a search_query."
)
args_schema: Type[BaseModel] = SerperDevToolSchema
search_url: str = "https://google.serper.dev/search"
country: Optional[str] = ""
location: Optional[str] = ""
locale: Optional[str] = ""
n_results: int = 10
save_file: bool = False
def _run(
self,
**kwargs: Any,
) -> Any:
def _run(
self,
**kwargs: Any,
) -> Any:
search_query = kwargs.get('search_query') or kwargs.get('query')
save_file = kwargs.get('save_file', self.save_file)
n_results = kwargs.get('n_results', self.n_results)
search_query = kwargs.get("search_query") or kwargs.get("query")
save_file = kwargs.get("save_file", self.save_file)
n_results = kwargs.get("n_results", self.n_results)
payload = { "q": search_query, "num": n_results }
payload = {"q": search_query, "num": n_results}
if self.country != '':
payload["gl"] = self.country
if self.location != '':
payload["location"] = self.location
if self.locale != '':
payload["hl"] = self.locale
if self.country != "":
payload["gl"] = self.country
if self.location != "":
payload["location"] = self.location
if self.locale != "":
payload["hl"] = self.locale
payload = json.dumps(payload)
payload = json.dumps(payload)
headers = {
'X-API-KEY': os.environ['SERPER_API_KEY'],
'content-type': 'application/json'
}
headers = {
"X-API-KEY": os.environ["SERPER_API_KEY"],
"content-type": "application/json",
}
response = requests.request("POST", self.search_url, headers=headers, data=payload)
results = response.json()
response = requests.request(
"POST", self.search_url, headers=headers, data=payload
)
results = response.json()
if 'organic' in results:
results = results['organic'][:self.n_results]
string = []
for result in results:
try:
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Snippet: {result['snippet']}",
"---"
]))
except KeyError:
continue
if "organic" in results:
results = results["organic"][: self.n_results]
string = []
for result in results:
try:
string.append(
"\n".join(
[
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Snippet: {result['snippet']}",
"---",
]
)
)
except KeyError:
continue
content = '\n'.join(string)
if save_file:
_save_results_to_file(content)
return f"\nSearch results: {content}\n"
else:
return results
content = "\n".join(string)
if save_file:
_save_results_to_file(content)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -1,19 +1,27 @@
import os
import requests
from typing import Any, Optional, Type
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyJobSearchToolSchema(BaseModel):
"""Input for Job Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
search_query: str = Field(
...,
description="Mandatory search query you want to use to fetch jobs postings.",
)
class SerplyJobSearchTool(RagTool):
name: str = "Job Search"
description: str = "A tool to perform to perform a job search in the US with a search_query."
description: str = (
"A tool to perform to perform a job search in the US with a search_query."
)
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
request_url: str = "https://api.serply.io/v1/job/search/"
proxy_location: Optional[str] = "US"
@@ -23,20 +31,17 @@ class SerplyJobSearchTool(RagTool):
"""
headers: Optional[dict] = {}
def __init__(
self,
**kwargs
):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": self.proxy_location
"X-Proxy-Location": self.proxy_location,
}
def _run(
self,
**kwargs: Any,
self,
**kwargs: Any,
) -> Any:
query_payload = {}
@@ -58,18 +63,22 @@ class SerplyJobSearchTool(RagTool):
string = []
for job in jobs:
try:
string.append('\n'.join([
f"Position: {job['position']}",
f"Employer: {job['employer']}",
f"Location: {job['location']}",
f"Link: {job['link']}",
f"""Highest: {', '.join([h for h in job['highlights']])}""",
f"Is Remote: {job['is_remote']}",
f"Is Hybrid: {job['is_remote']}",
"---"
]))
string.append(
"\n".join(
[
f"Position: {job['position']}",
f"Employer: {job['employer']}",
f"Location: {job['location']}",
f"Link: {job['link']}",
f"""Highest: {', '.join([h for h in job['highlights']])}""",
f"Is Remote: {job['is_remote']}",
f"Is Hybrid: {job['is_remote']}",
"---",
]
)
)
except KeyError:
continue
content = '\n'.join(string)
content = "\n".join(string)
return f"\nSearch results: {content}\n"

View File

@@ -1,14 +1,19 @@
import os
import requests
from typing import Any, Optional, Type
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
search_query: str = Field(
..., description="Mandatory search query you want to use to fetch news articles"
)
class SerplyNewsSearchTool(BaseTool):
@@ -21,15 +26,12 @@ class SerplyNewsSearchTool(BaseTool):
limit: Optional[int] = 10
def __init__(
self,
limit: Optional[int] = 10,
proxy_location: Optional[str] = "US",
**kwargs
self, limit: Optional[int] = 10, proxy_location: Optional[str] = "US", **kwargs
):
"""
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.limit = limit
@@ -37,12 +39,12 @@ class SerplyNewsSearchTool(BaseTool):
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
"X-Proxy-Location": proxy_location,
}
def _run(
self,
**kwargs: Any,
self,
**kwargs: Any,
) -> Any:
# build query parameters
query_payload = {}
@@ -58,24 +60,28 @@ class SerplyNewsSearchTool(BaseTool):
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "entries" in results:
results = results['entries']
results = results["entries"]
string = []
for result in results[:self.limit]:
for result in results[: self.limit]:
try:
# follow url
r = requests.get(result['link'])
final_link = r.history[-1].headers['Location']
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {final_link}",
f"Source: {result['source']['title']}",
f"Published: {result['published']}",
"---"
]))
r = requests.get(result["link"])
final_link = r.history[-1].headers["Location"]
string.append(
"\n".join(
[
f"Title: {result['title']}",
f"Link: {final_link}",
f"Source: {result['source']['title']}",
f"Published: {result['published']}",
"---",
]
)
)
except KeyError:
continue
content = '\n'.join(string)
content = "\n".join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -1,36 +1,39 @@
import os
import requests
from typing import Any, Optional, Type
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
search_query: str = Field(
...,
description="Mandatory search query you want to use to fetch scholarly literature",
)
class SerplyScholarSearchTool(BaseTool):
name: str = "Scholar Search"
description: str = "A tool to perform scholarly literature search with a search_query."
description: str = (
"A tool to perform scholarly literature search with a search_query."
)
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
search_url: str = "https://api.serply.io/v1/scholar/"
hl: Optional[str] = "us"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
proxy_location: Optional[str] = "US",
**kwargs
):
def __init__(self, hl: str = "us", proxy_location: Optional[str] = "US", **kwargs):
"""
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.hl = hl
@@ -38,16 +41,14 @@ class SerplyScholarSearchTool(BaseTool):
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
"X-Proxy-Location": proxy_location,
}
def _run(
self,
**kwargs: Any,
self,
**kwargs: Any,
) -> Any:
query_payload = {
"hl": self.hl
}
query_payload = {"hl": self.hl}
if "query" in kwargs:
query_payload["q"] = kwargs["query"]
@@ -67,20 +68,24 @@ class SerplyScholarSearchTool(BaseTool):
for article in articles:
try:
if "doc" in article:
link = article['doc']['link']
link = article["doc"]["link"]
else:
link = article['link']
authors = [author['name'] for author in article['author']['authors']]
string.append('\n'.join([
f"Title: {article['title']}",
f"Link: {link}",
f"Description: {article['description']}",
f"Cite: {article['cite']}",
f"Authors: {', '.join(authors)}",
"---"
]))
link = article["link"]
authors = [author["name"] for author in article["author"]["authors"]]
string.append(
"\n".join(
[
f"Title: {article['title']}",
f"Link: {link}",
f"Description: {article['description']}",
f"Cite: {article['cite']}",
f"Authors: {', '.join(authors)}",
"---",
]
)
)
except KeyError:
continue
content = '\n'.join(string)
content = "\n".join(string)
return f"\nSearch results: {content}\n"

View File

@@ -1,14 +1,19 @@
import os
import requests
from typing import Any, Optional, Type
from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
search_query: str = Field(
..., description="Mandatory search query you want to use to Google search"
)
class SerplyWebSearchTool(BaseTool):
@@ -24,21 +29,21 @@ class SerplyWebSearchTool(BaseTool):
headers: Optional[dict] = {}
def __init__(
self,
hl: str = "us",
limit: int = 10,
device_type: str = "desktop",
proxy_location: str = "US",
**kwargs
self,
hl: str = "us",
limit: int = 10,
device_type: str = "desktop",
proxy_location: str = "US",
**kwargs,
):
"""
param: query (str): The query to search for
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
param: device_type (str): desktop/mobile results (defaults to desktop)
proxy_location: (str): Where to perform the search, specifically for local/regional results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
param: query (str): The query to search for
param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
param: device_type (str): desktop/mobile results (defaults to desktop)
proxy_location: (str): Where to perform the search, specifically for local/regional results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
@@ -50,18 +55,18 @@ class SerplyWebSearchTool(BaseTool):
self.query_payload = {
"num": limit,
"gl": proxy_location.upper(),
"hl": hl.lower()
"hl": hl.lower(),
}
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"X-User-Agent": device_type,
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
"X-Proxy-Location": proxy_location,
}
def _run(
self,
**kwargs: Any,
self,
**kwargs: Any,
) -> Any:
if "query" in kwargs:
self.query_payload["q"] = kwargs["query"]
@@ -74,20 +79,24 @@ class SerplyWebSearchTool(BaseTool):
response = requests.request("GET", url, headers=self.headers)
results = response.json()
if "results" in results:
results = results['results']
results = results["results"]
string = []
for result in results:
try:
string.append('\n'.join([
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Description: {result['description'].strip()}",
"---"
]))
string.append(
"\n".join(
[
f"Title: {result['title']}",
f"Link: {result['link']}",
f"Description: {result['description'].strip()}",
"---",
]
)
)
except KeyError:
continue
content = '\n'.join(string)
content = "\n".join(string)
return f"\nSearch results: {content}\n"
else:
return results

View File

@@ -1,48 +1,50 @@
import os
from typing import Any, Optional, Type
import requests
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyWebpageToMarkdownToolSchema(BaseModel):
"""Input for Serply Search."""
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
url: str = Field(
...,
description="Mandatory url you want to use to fetch and convert to markdown",
)
class SerplyWebpageToMarkdownTool(RagTool):
name: str = "Webpage to Markdown"
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
description: str = (
"A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
)
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
request_url: str = "https://api.serply.io/v1/request"
proxy_location: Optional[str] = "US"
headers: Optional[dict] = {}
def __init__(
self,
proxy_location: Optional[str] = "US",
**kwargs
):
def __init__(self, proxy_location: Optional[str] = "US", **kwargs):
"""
proxy_location: (str): Where to perform the search, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
proxy_location: (str): Where to perform the search, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
"""
super().__init__(**kwargs)
self.proxy_location = proxy_location
self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location
"X-Proxy-Location": proxy_location,
}
def _run(
self,
**kwargs: Any,
self,
**kwargs: Any,
) -> Any:
data = {
"url": kwargs["url"],
"method": "GET",
"response_type": "markdown"
}
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
data = {"url": kwargs["url"], "method": "GET", "response_type": "markdown"}
response = requests.request(
"POST", self.request_url, headers=self.headers, json=data
)
return response.text

View File

@@ -1,21 +1,25 @@
from typing import Optional, Any, Type, Dict, Literal
from pydantic.v1 import BaseModel, Field
from typing import Any, Dict, Literal, Optional, Type
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool
class SpiderToolSchema(BaseModel):
url: str = Field(description="Website URL")
params: Optional[Dict[str, Any]] = Field(
description="Set additional params. Options include:\n"
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
)
mode: Literal["scrape", "crawl"] = Field(
default="scrape",
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set."
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.",
)
class SpiderTool(BaseTool):
name: str = "Spider scrape & crawl tool"
description: str = "Scrape & Crawl any url and return LLM-ready data."
@@ -26,11 +30,11 @@ class SpiderTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
try:
from spider import Spider # type: ignore
from spider import Spider # type: ignore
except ImportError:
raise ImportError(
"`spider-client` package not found, please run `pip install spider-client`"
)
raise ImportError(
"`spider-client` package not found, please run `pip install spider-client`"
)
self.spider = Spider(api_key=api_key)
@@ -38,7 +42,7 @@ class SpiderTool(BaseTool):
self,
url: str,
params: Optional[Dict[str, Any]] = None,
mode: Optional[Literal["scrape", "crawl"]] = "scrape"
mode: Optional[Literal["scrape", "crawl"]] = "scrape",
):
if mode not in ["scrape", "crawl"]:
raise ValueError(
@@ -51,9 +55,7 @@ class SpiderTool(BaseTool):
else:
params = {"return_format": "markdown"}
action = (
self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
)
action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
spider_docs = action(url=url, params=params)
return spider_docs

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -2,9 +2,10 @@ import base64
from typing import Type
import requests
from crewai_tools.tools.base_tool import BaseTool
from openai import OpenAI
from pydantic.v1 import BaseModel
from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -25,7 +25,9 @@ class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema):
class WebsiteSearchTool(RagTool):
name: str = "Search in a specific website"
description: str = "A tool that can be used to semantic search a query from a specific URL content."
description: str = (
"A tool that can be used to semantic search a query from a specific URL content."
)
args_schema: Type[BaseModel] = WebsiteSearchToolSchema
def __init__(self, website: Optional[str] = None, **kwargs):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -25,7 +25,9 @@ class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema):
class YoutubeChannelSearchTool(RagTool):
name: str = "Search a Youtube Channels content"
description: str = "A tool that can be used to semantic search a query from a Youtube Channels content."
description: str = (
"A tool that can be used to semantic search a query from a Youtube Channels content."
)
args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema
def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type
from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool
@@ -25,7 +25,9 @@ class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema):
class YoutubeVideoSearchTool(RagTool):
name: str = "Search a Youtube Video content"
description: str = "A tool that can be used to semantic search a query from a Youtube Video content."
description: str = (
"A tool that can be used to semantic search a query from a Youtube Video content."
)
args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema
def __init__(self, youtube_video_url: Optional[str] = None, **kwargs):