Merge pull request #101 from crewAIInc/brandon/cre-250-llamaindex-pydantic-v2

Pydantic v1 issues
This commit is contained in:
Rip&Tear
2024-09-13 21:53:32 +08:00
committed by GitHub
39 changed files with 752 additions and 550 deletions

View File

@@ -3,11 +3,11 @@ from typing import Any, Callable, Optional, Type
from langchain_core.tools import StructuredTool from langchain_core.tools import StructuredTool
from pydantic import BaseModel, ConfigDict, Field, validator from pydantic import BaseModel, ConfigDict, Field, validator
from pydantic.v1 import BaseModel as V1BaseModel from pydantic import BaseModel as PydanticBaseModel
class BaseTool(BaseModel, ABC): class BaseTool(BaseModel, ABC):
class _ArgsSchemaPlaceholder(V1BaseModel): class _ArgsSchemaPlaceholder(PydanticBaseModel):
pass pass
model_config = ConfigDict() model_config = ConfigDict()
@@ -16,7 +16,7 @@ class BaseTool(BaseModel, ABC):
"""The unique name of the tool that clearly communicates its purpose.""" """The unique name of the tool that clearly communicates its purpose."""
description: str description: str
"""Used to tell the model how/when/why to use the tool.""" """Used to tell the model how/when/why to use the tool."""
args_schema: Type[V1BaseModel] = Field(default_factory=_ArgsSchemaPlaceholder) args_schema: Type[PydanticBaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
"""The schema for the arguments that the tool accepts.""" """The schema for the arguments that the tool accepts."""
description_updated: bool = False description_updated: bool = False
"""Flag to check if the description has been updated.""" """Flag to check if the description has been updated."""
@@ -26,13 +26,15 @@ class BaseTool(BaseModel, ABC):
"""Flag to check if the tool should be the final agent answer.""" """Flag to check if the tool should be the final agent answer."""
@validator("args_schema", always=True, pre=True) @validator("args_schema", always=True, pre=True)
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]: def _default_args_schema(
cls, v: Type[PydanticBaseModel]
) -> Type[PydanticBaseModel]:
if not isinstance(v, cls._ArgsSchemaPlaceholder): if not isinstance(v, cls._ArgsSchemaPlaceholder):
return v return v
return type( return type(
f"{cls.__name__}Schema", f"{cls.__name__}Schema",
(V1BaseModel,), (PydanticBaseModel,),
{ {
"__annotations__": { "__annotations__": {
k: v for k, v in cls._run.__annotations__.items() if k != "return" k: v for k, v in cls._run.__annotations__.items() if k != "return"
@@ -75,7 +77,7 @@ class BaseTool(BaseModel, ABC):
class_name = f"{self.__class__.__name__}Schema" class_name = f"{self.__class__.__name__}Schema"
self.args_schema = type( self.args_schema = type(
class_name, class_name,
(V1BaseModel,), (PydanticBaseModel,),
{ {
"__annotations__": { "__annotations__": {
k: v k: v
@@ -127,7 +129,7 @@ def tool(*args):
class_name = "".join(tool_name.split()).title() class_name = "".join(tool_name.split()).title()
args_schema = type( args_schema = type(
class_name, class_name,
(V1BaseModel,), (PydanticBaseModel,),
{ {
"__annotations__": { "__annotations__": {
k: v for k, v in f.__annotations__.items() if k != "return" k: v for k, v in f.__annotations__.items() if k != "return"

View File

@@ -1,13 +1,19 @@
from typing import Optional, Any, Type from typing import Any, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class BrowserbaseLoadToolSchema(BaseModel): class BrowserbaseLoadToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
class BrowserbaseLoadTool(BaseTool): class BrowserbaseLoadTool(BaseTool):
name: str = "Browserbase web load tool" name: str = "Browserbase web load tool"
description: str = "Load webpages url in a headless browser using Browserbase and return the contents" description: str = (
"Load webpages url in a headless browser using Browserbase and return the contents"
)
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
api_key: Optional[str] = None api_key: Optional[str] = None
project_id: Optional[str] = None project_id: Optional[str] = None

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -3,8 +3,9 @@ import os
from typing import List, Optional, Type from typing import List, Optional, Type
import docker import docker
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
from pydantic.v1 import BaseModel, Field
class CodeInterpreterSchema(BaseModel): class CodeInterpreterSchema(BaseModel):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -1,9 +1,10 @@
import json import json
from typing import Type from typing import Type
from crewai_tools.tools.base_tool import BaseTool
from openai import OpenAI from openai import OpenAI
from pydantic.v1 import BaseModel from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel): class ImagePromptSchema(BaseModel):

View File

@@ -1,38 +1,50 @@
import os import os
from typing import Optional, Type, Any from typing import Any, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FixedDirectoryReadToolSchema(BaseModel): class FixedDirectoryReadToolSchema(BaseModel):
"""Input for DirectoryReadTool.""" """Input for DirectoryReadTool."""
pass
pass
class DirectoryReadToolSchema(FixedDirectoryReadToolSchema): class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
"""Input for DirectoryReadTool.""" """Input for DirectoryReadTool."""
directory: str = Field(..., description="Mandatory directory to list content")
directory: str = Field(..., description="Mandatory directory to list content")
class DirectoryReadTool(BaseTool): class DirectoryReadTool(BaseTool):
name: str = "List files in directory" name: str = "List files in directory"
description: str = "A tool that can be used to recursively list a directory's content." description: str = (
args_schema: Type[BaseModel] = DirectoryReadToolSchema "A tool that can be used to recursively list a directory's content."
directory: Optional[str] = None )
args_schema: Type[BaseModel] = DirectoryReadToolSchema
directory: Optional[str] = None
def __init__(self, directory: Optional[str] = None, **kwargs): def __init__(self, directory: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
if directory is not None: if directory is not None:
self.directory = directory self.directory = directory
self.description = f"A tool that can be used to list {directory}'s content." self.description = f"A tool that can be used to list {directory}'s content."
self.args_schema = FixedDirectoryReadToolSchema self.args_schema = FixedDirectoryReadToolSchema
self._generate_description() self._generate_description()
def _run(
self,
**kwargs: Any,
) -> Any:
directory = kwargs.get('directory', self.directory)
if directory[-1] == "/":
directory = directory[:-1]
files_list = [f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}" for root, dirs, files in os.walk(directory) for filename in files]
files = "\n- ".join(files_list)
return f"File paths: \n-{files}"
def _run(
self,
**kwargs: Any,
) -> Any:
directory = kwargs.get("directory", self.directory)
if directory[-1] == "/":
directory = directory[:-1]
files_list = [
f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}"
for root, dirs, files in os.walk(directory)
for filename in files
]
files = "\n- ".join(files_list)
return f"File paths: \n-{files}"

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.loaders.directory_loader import DirectoryLoader from embedchain.loaders.directory_loader import DirectoryLoader
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -1,26 +1,32 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
class FixedDOCXSearchToolSchema(BaseModel): class FixedDOCXSearchToolSchema(BaseModel):
"""Input for DOCXSearchTool.""" """Input for DOCXSearchTool."""
docx: Optional[str] = Field(..., description="Mandatory docx path you want to search")
docx: Optional[str] = Field(
..., description="Mandatory docx path you want to search"
)
search_query: str = Field( search_query: str = Field(
..., ...,
description="Mandatory search query you want to use to search the DOCX's content", description="Mandatory search query you want to use to search the DOCX's content",
) )
class DOCXSearchToolSchema(FixedDOCXSearchToolSchema): class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
"""Input for DOCXSearchTool.""" """Input for DOCXSearchTool."""
search_query: str = Field( search_query: str = Field(
..., ...,
description="Mandatory search query you want to use to search the DOCX's content", description="Mandatory search query you want to use to search the DOCX's content",
) )
class DOCXSearchTool(RagTool): class DOCXSearchTool(RagTool):
name: str = "Search a DOCX's content" name: str = "Search a DOCX's content"
description: str = ( description: str = (
@@ -56,9 +62,9 @@ class DOCXSearchTool(RagTool):
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
search_query = kwargs.get('search_query') search_query = kwargs.get("search_query")
if search_query is None: if search_query is None:
search_query = kwargs.get('query') search_query = kwargs.get("query")
docx = kwargs.get("docx") docx = kwargs.get("docx")
if docx is not None: if docx is not None:

View File

@@ -1,36 +1,49 @@
import os import os
from typing import Type from typing import Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class EXABaseToolToolSchema(BaseModel): class EXABaseToolToolSchema(BaseModel):
"""Input for EXABaseTool.""" """Input for EXABaseTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
search_query: str = Field(
..., description="Mandatory search query you want to use to search the internet"
)
class EXABaseTool(BaseTool): class EXABaseTool(BaseTool):
name: str = "Search the internet" name: str = "Search the internet"
description: str = "A tool that can be used to search the internet from a search_query" description: str = (
args_schema: Type[BaseModel] = EXABaseToolToolSchema "A tool that can be used to search the internet from a search_query"
search_url: str = "https://api.exa.ai/search" )
n_results: int = None args_schema: Type[BaseModel] = EXABaseToolToolSchema
headers: dict = { search_url: str = "https://api.exa.ai/search"
"accept": "application/json", n_results: int = None
"content-type": "application/json", headers: dict = {
} "accept": "application/json",
"content-type": "application/json",
}
def _parse_results(self, results): def _parse_results(self, results):
stirng = [] stirng = []
for result in results: for result in results:
try: try:
stirng.append('\n'.join([ stirng.append(
f"Title: {result['title']}", "\n".join(
f"Score: {result['score']}", [
f"Url: {result['url']}", f"Title: {result['title']}",
f"ID: {result['id']}", f"Score: {result['score']}",
"---" f"Url: {result['url']}",
])) f"ID: {result['id']}",
except KeyError: "---",
next ]
)
)
except KeyError:
next
content = '\n'.join(stirng) content = "\n".join(stirng)
return f"\nSearch results: {content}\n" return f"\nSearch results: {content}\n"

View File

@@ -1,19 +1,20 @@
from typing import Optional, Type, Any from typing import Any, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FixedFileReadToolSchema(BaseModel): class FixedFileReadToolSchema(BaseModel):
"""Input for FileReadTool.""" """Input for FileReadTool."""
pass pass
class FileReadToolSchema(FixedFileReadToolSchema): class FileReadToolSchema(FixedFileReadToolSchema):
"""Input for FileReadTool.""" """Input for FileReadTool."""
file_path: str = Field(
..., file_path: str = Field(..., description="Mandatory file full path to read the file")
description="Mandatory file full path to read the file"
)
class FileReadTool(BaseTool): class FileReadTool(BaseTool):
@@ -22,11 +23,7 @@ class FileReadTool(BaseTool):
args_schema: Type[BaseModel] = FileReadToolSchema args_schema: Type[BaseModel] = FileReadToolSchema
file_path: Optional[str] = None file_path: Optional[str] = None
def __init__( def __init__(self, file_path: Optional[str] = None, **kwargs):
self,
file_path: Optional[str] = None,
**kwargs
):
super().__init__(**kwargs) super().__init__(**kwargs)
if file_path is not None: if file_path is not None:
self.file_path = file_path self.file_path = file_path
@@ -39,8 +36,8 @@ class FileReadTool(BaseTool):
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
try: try:
file_path = kwargs.get('file_path', self.file_path) file_path = kwargs.get("file_path", self.file_path)
with open(file_path, 'r') as file: with open(file_path, "r") as file:
return file.read() return file.read()
except Exception as e: except Exception as e:
return f"Fail to read the file {file_path}. Error: {e}" return f"Fail to read the file {file_path}. Error: {e}"

View File

@@ -1,39 +1,46 @@
import os import os
from typing import Optional, Type, Any from typing import Any, Optional, Type
from pydantic import BaseModel
from pydantic.v1 import BaseModel
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FileWriterToolInput(BaseModel): class FileWriterToolInput(BaseModel):
filename: str filename: str
content: str content: str
directory: Optional[str] = None directory: Optional[str] = None
overwrite: bool = False overwrite: bool = False
class FileWriterTool(BaseTool): class FileWriterTool(BaseTool):
name: str = "File Writer Tool" name: str = "File Writer Tool"
description: str = "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input." description: str = (
"A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
)
args_schema: Type[BaseModel] = FileWriterToolInput args_schema: Type[BaseModel] = FileWriterToolInput
def _run(self, **kwargs: Any) -> str: def _run(self, **kwargs: Any) -> str:
try: try:
# Create the directory if it doesn't exist # Create the directory if it doesn't exist
if kwargs['directory'] and not os.path.exists(kwargs['directory']): if kwargs["directory"] and not os.path.exists(kwargs["directory"]):
os.makedirs(kwargs['directory']) os.makedirs(kwargs["directory"])
# Construct the full path # Construct the full path
filepath = os.path.join(kwargs['directory'] or '', kwargs['filename']) filepath = os.path.join(kwargs["directory"] or "", kwargs["filename"])
# Check if file exists and overwrite is not allowed # Check if file exists and overwrite is not allowed
if os.path.exists(filepath) and not kwargs['overwrite']: if os.path.exists(filepath) and not kwargs["overwrite"]:
return f"File {filepath} already exists and overwrite option was not passed." return f"File {filepath} already exists and overwrite option was not passed."
# Write content to the file # Write content to the file
mode = 'w' if kwargs['overwrite'] else 'x' mode = "w" if kwargs["overwrite"] else "x"
with open(filepath, mode) as file: with open(filepath, mode) as file:
file.write(kwargs['content']) file.write(kwargs["content"])
return f"Content successfully written to {filepath}" return f"Content successfully written to {filepath}"
except FileExistsError: except FileExistsError:
return f"File {filepath} already exists and overwrite option was not passed." return (
f"File {filepath} already exists and overwrite option was not passed."
)
except Exception as e: except Exception as e:
return f"An error occurred while writing to the file: {str(e)}" return f"An error occurred while writing to the file: {str(e)}"

View File

@@ -1,11 +1,19 @@
from typing import Optional, Any, Type, Dict, List from typing import Any, Dict, List, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class FirecrawlCrawlWebsiteToolSchema(BaseModel): class FirecrawlCrawlWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling") crawler_options: Optional[Dict[str, Any]] = Field(
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page") default=None, description="Options for crawling"
)
page_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for page"
)
class FirecrawlCrawlWebsiteTool(BaseTool): class FirecrawlCrawlWebsiteTool(BaseTool):
name: str = "Firecrawl web crawl tool" name: str = "Firecrawl web crawl tool"
@@ -17,22 +25,24 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
try: try:
from firecrawl import FirecrawlApp # type: ignore from firecrawl import FirecrawlApp # type: ignore
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`" "`firecrawl` package not found, please run `pip install firecrawl-py`"
) )
self.firecrawl = FirecrawlApp(api_key=api_key) self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None): def _run(
if (crawler_options is None): self,
url: str,
crawler_options: Optional[Dict[str, Any]] = None,
page_options: Optional[Dict[str, Any]] = None,
):
if crawler_options is None:
crawler_options = {} crawler_options = {}
if (page_options is None): if page_options is None:
page_options = {} page_options = {}
options = { options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
"crawlerOptions": crawler_options, return self.firecrawl.crawl_url(url, options)
"pageOptions": page_options
}
return self.firecrawl.crawl_url(url, options)

View File

@@ -1,12 +1,23 @@
from typing import Optional, Any, Type, Dict from typing import Any, Dict, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class FirecrawlScrapeWebsiteToolSchema(BaseModel): class FirecrawlScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping") page_options: Optional[Dict[str, Any]] = Field(
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction") default=None, description="Options for page scraping"
timeout: Optional[int] = Field(default=None, description="Timeout in milliseconds for the scraping operation. The default value is 30000.") )
extractor_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for data extraction"
)
timeout: Optional[int] = Field(
default=None,
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
)
class FirecrawlScrapeWebsiteTool(BaseTool): class FirecrawlScrapeWebsiteTool(BaseTool):
name: str = "Firecrawl web scrape tool" name: str = "Firecrawl web scrape tool"
@@ -18,15 +29,21 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
try: try:
from firecrawl import FirecrawlApp # type: ignore from firecrawl import FirecrawlApp # type: ignore
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`" "`firecrawl` package not found, please run `pip install firecrawl-py`"
) )
self.firecrawl = FirecrawlApp(api_key=api_key) self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None): def _run(
self,
url: str,
page_options: Optional[Dict[str, Any]] = None,
extractor_options: Optional[Dict[str, Any]] = None,
timeout: Optional[int] = None,
):
if page_options is None: if page_options is None:
page_options = {} page_options = {}
if extractor_options is None: if extractor_options is None:
@@ -37,6 +54,6 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
options = { options = {
"pageOptions": page_options, "pageOptions": page_options,
"extractorOptions": extractor_options, "extractorOptions": extractor_options,
"timeout": timeout "timeout": timeout,
} }
return self.firecrawl.scrape_url(url, options) return self.firecrawl.scrape_url(url, options)

View File

@@ -1,11 +1,19 @@
from typing import Optional, Any, Type, Dict, List from typing import Any, Dict, List, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class FirecrawlSearchToolSchema(BaseModel): class FirecrawlSearchToolSchema(BaseModel):
query: str = Field(description="Search query") query: str = Field(description="Search query")
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting") page_options: Optional[Dict[str, Any]] = Field(
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching") default=None, description="Options for result formatting"
)
search_options: Optional[Dict[str, Any]] = Field(
default=None, description="Options for searching"
)
class FirecrawlSearchTool(BaseTool): class FirecrawlSearchTool(BaseTool):
name: str = "Firecrawl web search tool" name: str = "Firecrawl web search tool"
@@ -17,22 +25,24 @@ class FirecrawlSearchTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
try: try:
from firecrawl import FirecrawlApp # type: ignore from firecrawl import FirecrawlApp # type: ignore
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"`firecrawl` package not found, please run `pip install firecrawl-py`" "`firecrawl` package not found, please run `pip install firecrawl-py`"
) )
self.firecrawl = FirecrawlApp(api_key=api_key) self.firecrawl = FirecrawlApp(api_key=api_key)
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None): def _run(
if (page_options is None): self,
query: str,
page_options: Optional[Dict[str, Any]] = None,
result_options: Optional[Dict[str, Any]] = None,
):
if page_options is None:
page_options = {} page_options = {}
if (result_options is None): if result_options is None:
result_options = {} result_options = {}
options = { options = {"pageOptions": page_options, "resultOptions": result_options}
"pageOptions": page_options,
"resultOptions": result_options
}
return self.firecrawl.search(query, options) return self.firecrawl.search(query, options)

View File

@@ -1,7 +1,7 @@
from typing import Any, List, Optional, Type from typing import Any, List, Optional, Type
from embedchain.loaders.github import GithubLoader from embedchain.loaders.github import GithubLoader
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
@@ -27,7 +27,9 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
class GithubSearchTool(RagTool): class GithubSearchTool(RagTool):
name: str = "Search a github repo's content" name: str = "Search a github repo's content"
description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities." description: str = (
"A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
)
summarize: bool = False summarize: bool = False
gh_token: str gh_token: str
args_schema: Type[BaseModel] = GithubSearchToolSchema args_schema: Type[BaseModel] = GithubSearchToolSchema

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -1,50 +1,48 @@
import os from typing import Any, Optional, Type, cast
import json
import requests from pydantic import BaseModel, Field
from typing import Type, Any, cast, Optional
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class LlamaIndexTool(BaseTool): class LlamaIndexTool(BaseTool):
"""Tool to wrap LlamaIndex tools/query engines.""" """Tool to wrap LlamaIndex tools/query engines."""
llama_index_tool: Any llama_index_tool: Any
def _run( def _run(
self, self,
*args: Any, *args: Any,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
"""Run tool.""" """Run tool."""
from llama_index.core.tools import BaseTool as LlamaBaseTool from llama_index.core.tools import BaseTool as LlamaBaseTool
tool = cast(LlamaBaseTool, self.llama_index_tool) tool = cast(LlamaBaseTool, self.llama_index_tool)
return tool(*args, **kwargs) return tool(*args, **kwargs)
@classmethod @classmethod
def from_tool( def from_tool(cls, tool: Any, **kwargs: Any) -> "LlamaIndexTool":
cls,
tool: Any,
**kwargs: Any
) -> "LlamaIndexTool":
from llama_index.core.tools import BaseTool as LlamaBaseTool from llama_index.core.tools import BaseTool as LlamaBaseTool
if not isinstance(tool, LlamaBaseTool): if not isinstance(tool, LlamaBaseTool):
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}") raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
tool = cast(LlamaBaseTool, tool) tool = cast(LlamaBaseTool, tool)
if tool.metadata.fn_schema is None: if tool.metadata.fn_schema is None:
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.") raise ValueError(
"The LlamaIndex tool does not have an fn_schema specified."
)
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema) args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
return cls( return cls(
name=tool.metadata.name, name=tool.metadata.name,
description=tool.metadata.description, description=tool.metadata.description,
args_schema=args_schema, args_schema=args_schema,
llama_index_tool=tool, llama_index_tool=tool,
**kwargs **kwargs,
) )
@classmethod @classmethod
def from_query_engine( def from_query_engine(
cls, cls,
@@ -52,7 +50,7 @@ class LlamaIndexTool(BaseTool):
name: Optional[str] = None, name: Optional[str] = None,
description: Optional[str] = None, description: Optional[str] = None,
return_direct: bool = False, return_direct: bool = False,
**kwargs: Any **kwargs: Any,
) -> "LlamaIndexTool": ) -> "LlamaIndexTool":
from llama_index.core.query_engine import BaseQueryEngine from llama_index.core.query_engine import BaseQueryEngine
from llama_index.core.tools import QueryEngineTool from llama_index.core.tools import QueryEngineTool
@@ -60,10 +58,11 @@ class LlamaIndexTool(BaseTool):
if not isinstance(query_engine, BaseQueryEngine): if not isinstance(query_engine, BaseQueryEngine):
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}") raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
# NOTE: by default the schema expects an `input` variable. However this # NOTE: by default the schema expects an `input` variable. However this
# confuses crewAI so we are renaming to `query`. # confuses crewAI so we are renaming to `query`.
class QueryToolSchema(BaseModel): class QueryToolSchema(BaseModel):
"""Schema for query tool.""" """Schema for query tool."""
query: str = Field(..., description="Search query for the query tool.") query: str = Field(..., description="Search query for the query tool.")
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query` # NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
@@ -72,13 +71,9 @@ class LlamaIndexTool(BaseTool):
name=name, name=name,
description=description, description=description,
return_direct=return_direct, return_direct=return_direct,
resolve_input_errors=True, resolve_input_errors=True,
) )
# HACK: we are replacing the schema with our custom schema # HACK: we are replacing the schema with our custom schema
query_engine_tool.metadata.fn_schema = QueryToolSchema query_engine_tool.metadata.fn_schema = QueryToolSchema
return cls.from_tool( return cls.from_tool(query_engine_tool, **kwargs)
query_engine_tool,
**kwargs
)

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -1,7 +1,7 @@
from typing import Any, Type from typing import Any, Type
from embedchain.loaders.mysql import MySQLLoader from embedchain.loaders.mysql import MySQLLoader
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
@@ -17,7 +17,9 @@ class MySQLSearchToolSchema(BaseModel):
class MySQLSearchTool(RagTool): class MySQLSearchTool(RagTool):
name: str = "Search a database's table content" name: str = "Search a database's table content"
description: str = "A tool that can be used to semantic search a query from a database table's content." description: str = (
"A tool that can be used to semantic search a query from a database table's content."
)
args_schema: Type[BaseModel] = MySQLSearchToolSchema args_schema: Type[BaseModel] = MySQLSearchToolSchema
db_uri: str = Field(..., description="Mandatory database URI") db_uri: str = Field(..., description="Mandatory database URI")

View File

@@ -1,8 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic import model_validator from pydantic import BaseModel, Field, model_validator
from pydantic.v1 import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -1,7 +1,7 @@
from typing import Any, Type from typing import Any, Type
from embedchain.loaders.postgres import PostgresLoader from embedchain.loaders.postgres import PostgresLoader
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
@@ -17,7 +17,9 @@ class PGSearchToolSchema(BaseModel):
class PGSearchTool(RagTool): class PGSearchTool(RagTool):
name: str = "Search a database's table content" name: str = "Search a database's table content"
description: str = "A tool that can be used to semantic search a query from a database table's content." description: str = (
"A tool that can be used to semantic search a query from a database table's content."
)
args_schema: Type[BaseModel] = PGSearchToolSchema args_schema: Type[BaseModel] = PGSearchToolSchema
db_uri: str = Field(..., description="Mandatory database URI") db_uri: str = Field(..., description="Mandatory database URI")

View File

@@ -1,57 +1,76 @@
import os import os
from typing import Any, Optional, Type
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from typing import Optional, Type, Any from pydantic import BaseModel, Field
from pydantic.v1 import BaseModel, Field
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FixedScrapeElementFromWebsiteToolSchema(BaseModel): class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
"""Input for ScrapeElementFromWebsiteTool.""" """Input for ScrapeElementFromWebsiteTool."""
pass
pass
class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema): class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema):
"""Input for ScrapeElementFromWebsiteTool.""" """Input for ScrapeElementFromWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website") website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(
...,
description="Mandatory css reference for element to scrape from the website",
)
class ScrapeElementFromWebsiteTool(BaseTool): class ScrapeElementFromWebsiteTool(BaseTool):
name: str = "Read a website content" name: str = "Read a website content"
description: str = "A tool that can be used to read a website content." description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
website_url: Optional[str] = None website_url: Optional[str] = None
cookies: Optional[dict] = None cookies: Optional[dict] = None
css_element: Optional[str] = None css_element: Optional[str] = None
headers: Optional[dict] = { headers: Optional[dict] = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
'Accept-Language': 'en-US,en;q=0.9', "Accept-Language": "en-US,en;q=0.9",
'Referer': 'https://www.google.com/', "Referer": "https://www.google.com/",
'Connection': 'keep-alive', "Connection": "keep-alive",
'Upgrade-Insecure-Requests': '1', "Upgrade-Insecure-Requests": "1",
'Accept-Encoding': 'gzip, deflate, br' "Accept-Encoding": "gzip, deflate, br",
} }
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.css_element = css_element
self.description = f"A tool that can be used to read {website_url}'s content."
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get('website_url', self.website_url)
css_element = kwargs.get('css_element', self.css_element)
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
parsed = BeautifulSoup(page.content, "html.parser")
elements = parsed.select(css_element)
return "\n".join([element.get_text() for element in elements])
def __init__(
self,
website_url: Optional[str] = None,
cookies: Optional[dict] = None,
css_element: Optional[str] = None,
**kwargs,
):
super().__init__(**kwargs)
if website_url is not None:
self.website_url = website_url
self.css_element = css_element
self.description = (
f"A tool that can be used to read {website_url}'s content."
)
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get("website_url", self.website_url)
css_element = kwargs.get("css_element", self.css_element)
page = requests.get(
website_url,
headers=self.headers,
cookies=self.cookies if self.cookies else {},
)
parsed = BeautifulSoup(page.content, "html.parser")
elements = parsed.select(css_element)
return "\n".join([element.get_text() for element in elements])

View File

@@ -1,59 +1,73 @@
import os import os
from typing import Any, Optional, Type
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from typing import Optional, Type, Any from pydantic import BaseModel, Field
from pydantic.v1 import BaseModel, Field
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FixedScrapeWebsiteToolSchema(BaseModel): class FixedScrapeWebsiteToolSchema(BaseModel):
"""Input for ScrapeWebsiteTool.""" """Input for ScrapeWebsiteTool."""
pass
pass
class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema): class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema):
"""Input for ScrapeWebsiteTool.""" """Input for ScrapeWebsiteTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
website_url: str = Field(..., description="Mandatory website url to read the file")
class ScrapeWebsiteTool(BaseTool): class ScrapeWebsiteTool(BaseTool):
name: str = "Read website content" name: str = "Read website content"
description: str = "A tool that can be used to read a website content." description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
website_url: Optional[str] = None website_url: Optional[str] = None
cookies: Optional[dict] = None cookies: Optional[dict] = None
headers: Optional[dict] = { headers: Optional[dict] = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
'Accept-Language': 'en-US,en;q=0.9', "Accept-Language": "en-US,en;q=0.9",
'Referer': 'https://www.google.com/', "Referer": "https://www.google.com/",
'Connection': 'keep-alive', "Connection": "keep-alive",
'Upgrade-Insecure-Requests': '1' "Upgrade-Insecure-Requests": "1",
} }
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs): def __init__(
super().__init__(**kwargs) self,
if website_url is not None: website_url: Optional[str] = None,
self.website_url = website_url cookies: Optional[dict] = None,
self.description = f"A tool that can be used to read {website_url}'s content." **kwargs,
self.args_schema = FixedScrapeWebsiteToolSchema ):
self._generate_description() super().__init__(**kwargs)
if cookies is not None: if website_url is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])} self.website_url = website_url
self.description = (
f"A tool that can be used to read {website_url}'s content."
)
self.args_schema = FixedScrapeWebsiteToolSchema
self._generate_description()
if cookies is not None:
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
website_url = kwargs.get('website_url', self.website_url) website_url = kwargs.get("website_url", self.website_url)
page = requests.get( page = requests.get(
website_url, website_url,
timeout=15, timeout=15,
headers=self.headers, headers=self.headers,
cookies=self.cookies if self.cookies else {} cookies=self.cookies if self.cookies else {},
) )
page.encoding = page.apparent_encoding page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser") parsed = BeautifulSoup(page.text, "html.parser")
text = parsed.get_text() text = parsed.get_text()
text = '\n'.join([i for i in text.split('\n') if i.strip() != '']) text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
text = ' '.join([i for i in text.split(' ') if i.strip() != '']) text = " ".join([i for i in text.split(" ") if i.strip() != ""])
return text return text

View File

@@ -1,20 +1,31 @@
import logging import logging
from typing import Any, Dict, Literal, Optional, Type
from pydantic import BaseModel, Field
from typing import Optional, Any, Type, Dict, Literal
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
logger = logging.getLogger(__file__) logger = logging.getLogger(__file__)
class ScrapflyScrapeWebsiteToolSchema(BaseModel): class ScrapflyScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Webpage URL") url: str = Field(description="Webpage URL")
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format") scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config") default="markdown", description="Webpage extraction format"
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures") )
scrape_config: Optional[Dict[str, Any]] = Field(
default=None, description="Scrapfly request scrape config"
)
ignore_scrape_failures: Optional[bool] = Field(
default=None, description="whether to ignore failures"
)
class ScrapflyScrapeWebsiteTool(BaseTool): class ScrapflyScrapeWebsiteTool(BaseTool):
name: str = "Scrapfly web scraping API tool" name: str = "Scrapfly web scraping API tool"
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text" description: str = (
"Scrape a webpage url using Scrapfly and return its content as markdown or text"
)
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
api_key: str = None api_key: str = None
scrapfly: Optional[Any] = None scrapfly: Optional[Any] = None
@@ -29,7 +40,13 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
) )
self.scrapfly = ScrapflyClient(key=api_key) self.scrapfly = ScrapflyClient(key=api_key)
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None): def _run(
self,
url: str,
scrape_format: str = "markdown",
scrape_config: Optional[Dict[str, Any]] = None,
ignore_scrape_failures: Optional[bool] = None,
):
from scrapfly import ScrapeApiResponse, ScrapeConfig from scrapfly import ScrapeApiResponse, ScrapeConfig
scrape_config = scrape_config if scrape_config is not None else {} scrape_config = scrape_config if scrape_config is not None else {}
@@ -44,4 +61,3 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
return None return None
else: else:
raise e raise e

View File

@@ -1,77 +1,94 @@
from typing import Optional, Type, Any
import time import time
from pydantic.v1 import BaseModel, Field from typing import Any, Optional, Type
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from pydantic import BaseModel, Field
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from ..base_tool import BaseTool from ..base_tool import BaseTool
class FixedSeleniumScrapingToolSchema(BaseModel): class FixedSeleniumScrapingToolSchema(BaseModel):
"""Input for SeleniumScrapingTool.""" """Input for SeleniumScrapingTool."""
pass
pass
class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
"""Input for SeleniumScrapingTool.""" """Input for SeleniumScrapingTool."""
website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website") website_url: str = Field(..., description="Mandatory website url to read the file")
css_element: str = Field(
...,
description="Mandatory css reference for element to scrape from the website",
)
class SeleniumScrapingTool(BaseTool): class SeleniumScrapingTool(BaseTool):
name: str = "Read a website content" name: str = "Read a website content"
description: str = "A tool that can be used to read a website content." description: str = "A tool that can be used to read a website content."
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
website_url: Optional[str] = None website_url: Optional[str] = None
driver: Optional[Any] = webdriver.Chrome driver: Optional[Any] = webdriver.Chrome
cookie: Optional[dict] = None cookie: Optional[dict] = None
wait_time: Optional[int] = 3 wait_time: Optional[int] = 3
css_element: Optional[str] = None css_element: Optional[str] = None
def __init__(self, website_url: Optional[str] = None, cookie: Optional[dict] = None, css_element: Optional[str] = None, **kwargs): def __init__(
super().__init__(**kwargs) self,
if cookie is not None: website_url: Optional[str] = None,
self.cookie = cookie cookie: Optional[dict] = None,
css_element: Optional[str] = None,
**kwargs,
):
super().__init__(**kwargs)
if cookie is not None:
self.cookie = cookie
if css_element is not None: if css_element is not None:
self.css_element = css_element self.css_element = css_element
if website_url is not None: if website_url is not None:
self.website_url = website_url self.website_url = website_url
self.description = f"A tool that can be used to read {website_url}'s content." self.description = (
self.args_schema = FixedSeleniumScrapingToolSchema f"A tool that can be used to read {website_url}'s content."
)
self.args_schema = FixedSeleniumScrapingToolSchema
self._generate_description() self._generate_description()
def _run(
self,
**kwargs: Any,
) -> Any:
website_url = kwargs.get('website_url', self.website_url)
css_element = kwargs.get('css_element', self.css_element)
driver = self._create_driver(website_url, self.cookie, self.wait_time)
content = [] def _run(
if css_element is None or css_element.strip() == "": self,
body_text = driver.find_element(By.TAG_NAME, "body").text **kwargs: Any,
content.append(body_text) ) -> Any:
else: website_url = kwargs.get("website_url", self.website_url)
for element in driver.find_elements(By.CSS_SELECTOR, css_element): css_element = kwargs.get("css_element", self.css_element)
content.append(element.text) driver = self._create_driver(website_url, self.cookie, self.wait_time)
driver.close()
return "\n".join(content)
def _create_driver(self, url, cookie, wait_time): content = []
options = Options() if css_element is None or css_element.strip() == "":
options.add_argument("--headless") body_text = driver.find_element(By.TAG_NAME, "body").text
driver = self.driver(options=options) content.append(body_text)
driver.get(url) else:
time.sleep(wait_time) for element in driver.find_elements(By.CSS_SELECTOR, css_element):
if cookie: content.append(element.text)
driver.add_cookie(cookie) driver.close()
time.sleep(wait_time) return "\n".join(content)
driver.get(url)
time.sleep(wait_time)
return driver
def close(self): def _create_driver(self, url, cookie, wait_time):
self.driver.close() options = Options()
options.add_argument("--headless")
driver = self.driver(options=options)
driver.get(url)
time.sleep(wait_time)
if cookie:
driver.add_cookie(cookie)
time.sleep(wait_time)
driver.get(url)
time.sleep(wait_time)
return driver
def close(self):
self.driver.close()

View File

@@ -1,80 +1,94 @@
import datetime import datetime
import os
import json import json
import requests import os
from typing import Any, Optional, Type
import requests
from pydantic import BaseModel, Field
from typing import Optional, Type, Any
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
def _save_results_to_file(content: str) -> None: def _save_results_to_file(content: str) -> None:
"""Saves the search results to a file.""" """Saves the search results to a file."""
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
with open(filename, 'w') as file: with open(filename, "w") as file:
file.write(content) file.write(content)
print(f"Results saved to {filename}") print(f"Results saved to {filename}")
class SerperDevToolSchema(BaseModel): class SerperDevToolSchema(BaseModel):
"""Input for SerperDevTool.""" """Input for SerperDevTool."""
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
search_query: str = Field(
..., description="Mandatory search query you want to use to search the internet"
)
class SerperDevTool(BaseTool): class SerperDevTool(BaseTool):
name: str = "Search the internet" name: str = "Search the internet"
description: str = "A tool that can be used to search the internet with a search_query." description: str = (
args_schema: Type[BaseModel] = SerperDevToolSchema "A tool that can be used to search the internet with a search_query."
search_url: str = "https://google.serper.dev/search" )
country: Optional[str] = '' args_schema: Type[BaseModel] = SerperDevToolSchema
location: Optional[str] = '' search_url: str = "https://google.serper.dev/search"
locale: Optional[str] = '' country: Optional[str] = ""
n_results: int = 10 location: Optional[str] = ""
save_file: bool = False locale: Optional[str] = ""
n_results: int = 10
save_file: bool = False
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
search_query = kwargs.get('search_query') or kwargs.get('query') search_query = kwargs.get("search_query") or kwargs.get("query")
save_file = kwargs.get('save_file', self.save_file) save_file = kwargs.get("save_file", self.save_file)
n_results = kwargs.get('n_results', self.n_results) n_results = kwargs.get("n_results", self.n_results)
payload = { "q": search_query, "num": n_results } payload = {"q": search_query, "num": n_results}
if self.country != '': if self.country != "":
payload["gl"] = self.country payload["gl"] = self.country
if self.location != '': if self.location != "":
payload["location"] = self.location payload["location"] = self.location
if self.locale != '': if self.locale != "":
payload["hl"] = self.locale payload["hl"] = self.locale
payload = json.dumps(payload) payload = json.dumps(payload)
headers = { headers = {
'X-API-KEY': os.environ['SERPER_API_KEY'], "X-API-KEY": os.environ["SERPER_API_KEY"],
'content-type': 'application/json' "content-type": "application/json",
} }
response = requests.request("POST", self.search_url, headers=headers, data=payload) response = requests.request(
results = response.json() "POST", self.search_url, headers=headers, data=payload
)
results = response.json()
if 'organic' in results: if "organic" in results:
results = results['organic'][:self.n_results] results = results["organic"][: self.n_results]
string = [] string = []
for result in results: for result in results:
try: try:
string.append('\n'.join([ string.append(
f"Title: {result['title']}", "\n".join(
f"Link: {result['link']}", [
f"Snippet: {result['snippet']}", f"Title: {result['title']}",
"---" f"Link: {result['link']}",
])) f"Snippet: {result['snippet']}",
except KeyError: "---",
continue ]
)
)
except KeyError:
continue
content = '\n'.join(string) content = "\n".join(string)
if save_file: if save_file:
_save_results_to_file(content) _save_results_to_file(content)
return f"\nSearch results: {content}\n" return f"\nSearch results: {content}\n"
else: else:
return results return results

View File

@@ -1,19 +1,27 @@
import os import os
import requests from typing import Any, Optional, Type
from urllib.parse import urlencode from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyJobSearchToolSchema(BaseModel): class SerplyJobSearchToolSchema(BaseModel):
"""Input for Job Search.""" """Input for Job Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
search_query: str = Field(
...,
description="Mandatory search query you want to use to fetch jobs postings.",
)
class SerplyJobSearchTool(RagTool): class SerplyJobSearchTool(RagTool):
name: str = "Job Search" name: str = "Job Search"
description: str = "A tool to perform to perform a job search in the US with a search_query." description: str = (
"A tool to perform to perform a job search in the US with a search_query."
)
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
request_url: str = "https://api.serply.io/v1/job/search/" request_url: str = "https://api.serply.io/v1/job/search/"
proxy_location: Optional[str] = "US" proxy_location: Optional[str] = "US"
@@ -23,20 +31,17 @@ class SerplyJobSearchTool(RagTool):
""" """
headers: Optional[dict] = {} headers: Optional[dict] = {}
def __init__( def __init__(self, **kwargs):
self,
**kwargs
):
super().__init__(**kwargs) super().__init__(**kwargs)
self.headers = { self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"], "X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools", "User-Agent": "crew-tools",
"X-Proxy-Location": self.proxy_location "X-Proxy-Location": self.proxy_location,
} }
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
query_payload = {} query_payload = {}
@@ -58,18 +63,22 @@ class SerplyJobSearchTool(RagTool):
string = [] string = []
for job in jobs: for job in jobs:
try: try:
string.append('\n'.join([ string.append(
f"Position: {job['position']}", "\n".join(
f"Employer: {job['employer']}", [
f"Location: {job['location']}", f"Position: {job['position']}",
f"Link: {job['link']}", f"Employer: {job['employer']}",
f"""Highest: {', '.join([h for h in job['highlights']])}""", f"Location: {job['location']}",
f"Is Remote: {job['is_remote']}", f"Link: {job['link']}",
f"Is Hybrid: {job['is_remote']}", f"""Highest: {', '.join([h for h in job['highlights']])}""",
"---" f"Is Remote: {job['is_remote']}",
])) f"Is Hybrid: {job['is_remote']}",
"---",
]
)
)
except KeyError: except KeyError:
continue continue
content = '\n'.join(string) content = "\n".join(string)
return f"\nSearch results: {content}\n" return f"\nSearch results: {content}\n"

View File

@@ -1,14 +1,19 @@
import os import os
import requests from typing import Any, Optional, Type
from urllib.parse import urlencode from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class SerplyNewsSearchToolSchema(BaseModel): class SerplyNewsSearchToolSchema(BaseModel):
"""Input for Serply News Search.""" """Input for Serply News Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
search_query: str = Field(
..., description="Mandatory search query you want to use to fetch news articles"
)
class SerplyNewsSearchTool(BaseTool): class SerplyNewsSearchTool(BaseTool):
@@ -21,15 +26,12 @@ class SerplyNewsSearchTool(BaseTool):
limit: Optional[int] = 10 limit: Optional[int] = 10
def __init__( def __init__(
self, self, limit: Optional[int] = 10, proxy_location: Optional[str] = "US", **kwargs
limit: Optional[int] = 10,
proxy_location: Optional[str] = "US",
**kwargs
): ):
""" """
param: limit (int): The maximum number of results to return [10-100, defaults to 10] param: limit (int): The maximum number of results to return [10-100, defaults to 10]
proxy_location: (str): Where to get news, specifically for a specific country results. proxy_location: (str): Where to get news, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self.limit = limit self.limit = limit
@@ -37,12 +39,12 @@ class SerplyNewsSearchTool(BaseTool):
self.headers = { self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"], "X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools", "User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location "X-Proxy-Location": proxy_location,
} }
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
# build query parameters # build query parameters
query_payload = {} query_payload = {}
@@ -58,24 +60,28 @@ class SerplyNewsSearchTool(BaseTool):
response = requests.request("GET", url, headers=self.headers) response = requests.request("GET", url, headers=self.headers)
results = response.json() results = response.json()
if "entries" in results: if "entries" in results:
results = results['entries'] results = results["entries"]
string = [] string = []
for result in results[:self.limit]: for result in results[: self.limit]:
try: try:
# follow url # follow url
r = requests.get(result['link']) r = requests.get(result["link"])
final_link = r.history[-1].headers['Location'] final_link = r.history[-1].headers["Location"]
string.append('\n'.join([ string.append(
f"Title: {result['title']}", "\n".join(
f"Link: {final_link}", [
f"Source: {result['source']['title']}", f"Title: {result['title']}",
f"Published: {result['published']}", f"Link: {final_link}",
"---" f"Source: {result['source']['title']}",
])) f"Published: {result['published']}",
"---",
]
)
)
except KeyError: except KeyError:
continue continue
content = '\n'.join(string) content = "\n".join(string)
return f"\nSearch results: {content}\n" return f"\nSearch results: {content}\n"
else: else:
return results return results

View File

@@ -1,36 +1,39 @@
import os import os
import requests from typing import Any, Optional, Type
from urllib.parse import urlencode from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class SerplyScholarSearchToolSchema(BaseModel): class SerplyScholarSearchToolSchema(BaseModel):
"""Input for Serply Scholar Search.""" """Input for Serply Scholar Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
search_query: str = Field(
...,
description="Mandatory search query you want to use to fetch scholarly literature",
)
class SerplyScholarSearchTool(BaseTool): class SerplyScholarSearchTool(BaseTool):
name: str = "Scholar Search" name: str = "Scholar Search"
description: str = "A tool to perform scholarly literature search with a search_query." description: str = (
"A tool to perform scholarly literature search with a search_query."
)
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
search_url: str = "https://api.serply.io/v1/scholar/" search_url: str = "https://api.serply.io/v1/scholar/"
hl: Optional[str] = "us" hl: Optional[str] = "us"
proxy_location: Optional[str] = "US" proxy_location: Optional[str] = "US"
headers: Optional[dict] = {} headers: Optional[dict] = {}
def __init__( def __init__(self, hl: str = "us", proxy_location: Optional[str] = "US", **kwargs):
self,
hl: str = "us",
proxy_location: Optional[str] = "US",
**kwargs
):
""" """
param: hl (str): host Language code to display results in param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results. proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self.hl = hl self.hl = hl
@@ -38,16 +41,14 @@ class SerplyScholarSearchTool(BaseTool):
self.headers = { self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"], "X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools", "User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location "X-Proxy-Location": proxy_location,
} }
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
query_payload = { query_payload = {"hl": self.hl}
"hl": self.hl
}
if "query" in kwargs: if "query" in kwargs:
query_payload["q"] = kwargs["query"] query_payload["q"] = kwargs["query"]
@@ -67,20 +68,24 @@ class SerplyScholarSearchTool(BaseTool):
for article in articles: for article in articles:
try: try:
if "doc" in article: if "doc" in article:
link = article['doc']['link'] link = article["doc"]["link"]
else: else:
link = article['link'] link = article["link"]
authors = [author['name'] for author in article['author']['authors']] authors = [author["name"] for author in article["author"]["authors"]]
string.append('\n'.join([ string.append(
f"Title: {article['title']}", "\n".join(
f"Link: {link}", [
f"Description: {article['description']}", f"Title: {article['title']}",
f"Cite: {article['cite']}", f"Link: {link}",
f"Authors: {', '.join(authors)}", f"Description: {article['description']}",
"---" f"Cite: {article['cite']}",
])) f"Authors: {', '.join(authors)}",
"---",
]
)
)
except KeyError: except KeyError:
continue continue
content = '\n'.join(string) content = "\n".join(string)
return f"\nSearch results: {content}\n" return f"\nSearch results: {content}\n"

View File

@@ -1,14 +1,19 @@
import os import os
import requests from typing import Any, Optional, Type
from urllib.parse import urlencode from urllib.parse import urlencode
from typing import Type, Any, Optional
from pydantic.v1 import BaseModel, Field import requests
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class SerplyWebSearchToolSchema(BaseModel): class SerplyWebSearchToolSchema(BaseModel):
"""Input for Serply Web Search.""" """Input for Serply Web Search."""
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
search_query: str = Field(
..., description="Mandatory search query you want to use to Google search"
)
class SerplyWebSearchTool(BaseTool): class SerplyWebSearchTool(BaseTool):
@@ -24,21 +29,21 @@ class SerplyWebSearchTool(BaseTool):
headers: Optional[dict] = {} headers: Optional[dict] = {}
def __init__( def __init__(
self, self,
hl: str = "us", hl: str = "us",
limit: int = 10, limit: int = 10,
device_type: str = "desktop", device_type: str = "desktop",
proxy_location: str = "US", proxy_location: str = "US",
**kwargs **kwargs,
): ):
""" """
param: query (str): The query to search for param: query (str): The query to search for
param: hl (str): host Language code to display results in param: hl (str): host Language code to display results in
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
param: limit (int): The maximum number of results to return [10-100, defaults to 10] param: limit (int): The maximum number of results to return [10-100, defaults to 10]
param: device_type (str): desktop/mobile results (defaults to desktop) param: device_type (str): desktop/mobile results (defaults to desktop)
proxy_location: (str): Where to perform the search, specifically for local/regional results. proxy_location: (str): Where to perform the search, specifically for local/regional results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
@@ -50,18 +55,18 @@ class SerplyWebSearchTool(BaseTool):
self.query_payload = { self.query_payload = {
"num": limit, "num": limit,
"gl": proxy_location.upper(), "gl": proxy_location.upper(),
"hl": hl.lower() "hl": hl.lower(),
} }
self.headers = { self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"], "X-API-KEY": os.environ["SERPLY_API_KEY"],
"X-User-Agent": device_type, "X-User-Agent": device_type,
"User-Agent": "crew-tools", "User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location "X-Proxy-Location": proxy_location,
} }
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
if "query" in kwargs: if "query" in kwargs:
self.query_payload["q"] = kwargs["query"] self.query_payload["q"] = kwargs["query"]
@@ -74,20 +79,24 @@ class SerplyWebSearchTool(BaseTool):
response = requests.request("GET", url, headers=self.headers) response = requests.request("GET", url, headers=self.headers)
results = response.json() results = response.json()
if "results" in results: if "results" in results:
results = results['results'] results = results["results"]
string = [] string = []
for result in results: for result in results:
try: try:
string.append('\n'.join([ string.append(
f"Title: {result['title']}", "\n".join(
f"Link: {result['link']}", [
f"Description: {result['description'].strip()}", f"Title: {result['title']}",
"---" f"Link: {result['link']}",
])) f"Description: {result['description'].strip()}",
"---",
]
)
)
except KeyError: except KeyError:
continue continue
content = '\n'.join(string) content = "\n".join(string)
return f"\nSearch results: {content}\n" return f"\nSearch results: {content}\n"
else: else:
return results return results

View File

@@ -1,48 +1,50 @@
import os import os
from typing import Any, Optional, Type
import requests import requests
from typing import Type, Any, Optional from pydantic import BaseModel, Field
from pydantic.v1 import BaseModel, Field
from crewai_tools.tools.rag.rag_tool import RagTool from crewai_tools.tools.rag.rag_tool import RagTool
class SerplyWebpageToMarkdownToolSchema(BaseModel): class SerplyWebpageToMarkdownToolSchema(BaseModel):
"""Input for Serply Search.""" """Input for Serply Search."""
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
url: str = Field(
...,
description="Mandatory url you want to use to fetch and convert to markdown",
)
class SerplyWebpageToMarkdownTool(RagTool): class SerplyWebpageToMarkdownTool(RagTool):
name: str = "Webpage to Markdown" name: str = "Webpage to Markdown"
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand" description: str = (
"A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
)
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
request_url: str = "https://api.serply.io/v1/request" request_url: str = "https://api.serply.io/v1/request"
proxy_location: Optional[str] = "US" proxy_location: Optional[str] = "US"
headers: Optional[dict] = {} headers: Optional[dict] = {}
def __init__( def __init__(self, proxy_location: Optional[str] = "US", **kwargs):
self,
proxy_location: Optional[str] = "US",
**kwargs
):
""" """
proxy_location: (str): Where to perform the search, specifically for a specific country results. proxy_location: (str): Where to perform the search, specifically for a specific country results.
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self.proxy_location = proxy_location self.proxy_location = proxy_location
self.headers = { self.headers = {
"X-API-KEY": os.environ["SERPLY_API_KEY"], "X-API-KEY": os.environ["SERPLY_API_KEY"],
"User-Agent": "crew-tools", "User-Agent": "crew-tools",
"X-Proxy-Location": proxy_location "X-Proxy-Location": proxy_location,
} }
def _run( def _run(
self, self,
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
data = { data = {"url": kwargs["url"], "method": "GET", "response_type": "markdown"}
"url": kwargs["url"], response = requests.request(
"method": "GET", "POST", self.request_url, headers=self.headers, json=data
"response_type": "markdown" )
}
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
return response.text return response.text

View File

@@ -1,21 +1,25 @@
from typing import Optional, Any, Type, Dict, Literal from typing import Any, Dict, Literal, Optional, Type
from pydantic.v1 import BaseModel, Field
from pydantic import BaseModel, Field
from crewai_tools.tools.base_tool import BaseTool from crewai_tools.tools.base_tool import BaseTool
class SpiderToolSchema(BaseModel): class SpiderToolSchema(BaseModel):
url: str = Field(description="Website URL") url: str = Field(description="Website URL")
params: Optional[Dict[str, Any]] = Field( params: Optional[Dict[str, Any]] = Field(
description="Set additional params. Options include:\n" description="Set additional params. Options include:\n"
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n" "- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n" "- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n" "- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n" "- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
) )
mode: Literal["scrape", "crawl"] = Field( mode: Literal["scrape", "crawl"] = Field(
default="scrape", default="scrape",
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set." description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.",
) )
class SpiderTool(BaseTool): class SpiderTool(BaseTool):
name: str = "Spider scrape & crawl tool" name: str = "Spider scrape & crawl tool"
description: str = "Scrape & Crawl any url and return LLM-ready data." description: str = "Scrape & Crawl any url and return LLM-ready data."
@@ -26,11 +30,11 @@ class SpiderTool(BaseTool):
def __init__(self, api_key: Optional[str] = None, **kwargs): def __init__(self, api_key: Optional[str] = None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
try: try:
from spider import Spider # type: ignore from spider import Spider # type: ignore
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"`spider-client` package not found, please run `pip install spider-client`" "`spider-client` package not found, please run `pip install spider-client`"
) )
self.spider = Spider(api_key=api_key) self.spider = Spider(api_key=api_key)
@@ -38,7 +42,7 @@ class SpiderTool(BaseTool):
self, self,
url: str, url: str,
params: Optional[Dict[str, Any]] = None, params: Optional[Dict[str, Any]] = None,
mode: Optional[Literal["scrape", "crawl"]] = "scrape" mode: Optional[Literal["scrape", "crawl"]] = "scrape",
): ):
if mode not in ["scrape", "crawl"]: if mode not in ["scrape", "crawl"]:
raise ValueError( raise ValueError(
@@ -51,9 +55,7 @@ class SpiderTool(BaseTool):
else: else:
params = {"return_format": "markdown"} params = {"return_format": "markdown"}
action = ( action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
)
spider_docs = action(url=url, params=params) spider_docs = action(url=url, params=params)
return spider_docs return spider_docs

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -2,9 +2,10 @@ import base64
from typing import Type from typing import Type
import requests import requests
from crewai_tools.tools.base_tool import BaseTool
from openai import OpenAI from openai import OpenAI
from pydantic.v1 import BaseModel from pydantic import BaseModel
from crewai_tools.tools.base_tool import BaseTool
class ImagePromptSchema(BaseModel): class ImagePromptSchema(BaseModel):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
@@ -25,7 +25,9 @@ class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema):
class WebsiteSearchTool(RagTool): class WebsiteSearchTool(RagTool):
name: str = "Search in a specific website" name: str = "Search in a specific website"
description: str = "A tool that can be used to semantic search a query from a specific URL content." description: str = (
"A tool that can be used to semantic search a query from a specific URL content."
)
args_schema: Type[BaseModel] = WebsiteSearchToolSchema args_schema: Type[BaseModel] = WebsiteSearchToolSchema
def __init__(self, website: Optional[str] = None, **kwargs): def __init__(self, website: Optional[str] = None, **kwargs):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
@@ -25,7 +25,9 @@ class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema):
class YoutubeChannelSearchTool(RagTool): class YoutubeChannelSearchTool(RagTool):
name: str = "Search a Youtube Channels content" name: str = "Search a Youtube Channels content"
description: str = "A tool that can be used to semantic search a query from a Youtube Channels content." description: str = (
"A tool that can be used to semantic search a query from a Youtube Channels content."
)
args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema
def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs): def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs):

View File

@@ -1,7 +1,7 @@
from typing import Any, Optional, Type from typing import Any, Optional, Type
from embedchain.models.data_type import DataType from embedchain.models.data_type import DataType
from pydantic.v1 import BaseModel, Field from pydantic import BaseModel, Field
from ..rag.rag_tool import RagTool from ..rag.rag_tool import RagTool
@@ -25,7 +25,9 @@ class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema):
class YoutubeVideoSearchTool(RagTool): class YoutubeVideoSearchTool(RagTool):
name: str = "Search a Youtube Video content" name: str = "Search a Youtube Video content"
description: str = "A tool that can be used to semantic search a query from a Youtube Video content." description: str = (
"A tool that can be used to semantic search a query from a Youtube Video content."
)
args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema
def __init__(self, youtube_video_url: Optional[str] = None, **kwargs): def __init__(self, youtube_video_url: Optional[str] = None, **kwargs):