mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 23:58:34 +00:00
Merge pull request #101 from crewAIInc/brandon/cre-250-llamaindex-pydantic-v2
Pydantic v1 issues
This commit is contained in:
@@ -3,11 +3,11 @@ from typing import Any, Callable, Optional, Type
|
||||
|
||||
from langchain_core.tools import StructuredTool
|
||||
from pydantic import BaseModel, ConfigDict, Field, validator
|
||||
from pydantic.v1 import BaseModel as V1BaseModel
|
||||
from pydantic import BaseModel as PydanticBaseModel
|
||||
|
||||
|
||||
class BaseTool(BaseModel, ABC):
|
||||
class _ArgsSchemaPlaceholder(V1BaseModel):
|
||||
class _ArgsSchemaPlaceholder(PydanticBaseModel):
|
||||
pass
|
||||
|
||||
model_config = ConfigDict()
|
||||
@@ -16,7 +16,7 @@ class BaseTool(BaseModel, ABC):
|
||||
"""The unique name of the tool that clearly communicates its purpose."""
|
||||
description: str
|
||||
"""Used to tell the model how/when/why to use the tool."""
|
||||
args_schema: Type[V1BaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
|
||||
args_schema: Type[PydanticBaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
|
||||
"""The schema for the arguments that the tool accepts."""
|
||||
description_updated: bool = False
|
||||
"""Flag to check if the description has been updated."""
|
||||
@@ -26,13 +26,15 @@ class BaseTool(BaseModel, ABC):
|
||||
"""Flag to check if the tool should be the final agent answer."""
|
||||
|
||||
@validator("args_schema", always=True, pre=True)
|
||||
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
|
||||
def _default_args_schema(
|
||||
cls, v: Type[PydanticBaseModel]
|
||||
) -> Type[PydanticBaseModel]:
|
||||
if not isinstance(v, cls._ArgsSchemaPlaceholder):
|
||||
return v
|
||||
|
||||
return type(
|
||||
f"{cls.__name__}Schema",
|
||||
(V1BaseModel,),
|
||||
(PydanticBaseModel,),
|
||||
{
|
||||
"__annotations__": {
|
||||
k: v for k, v in cls._run.__annotations__.items() if k != "return"
|
||||
@@ -75,7 +77,7 @@ class BaseTool(BaseModel, ABC):
|
||||
class_name = f"{self.__class__.__name__}Schema"
|
||||
self.args_schema = type(
|
||||
class_name,
|
||||
(V1BaseModel,),
|
||||
(PydanticBaseModel,),
|
||||
{
|
||||
"__annotations__": {
|
||||
k: v
|
||||
@@ -127,7 +129,7 @@ def tool(*args):
|
||||
class_name = "".join(tool_name.split()).title()
|
||||
args_schema = type(
|
||||
class_name,
|
||||
(V1BaseModel,),
|
||||
(PydanticBaseModel,),
|
||||
{
|
||||
"__annotations__": {
|
||||
k: v for k, v in f.__annotations__.items() if k != "return"
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
from typing import Optional, Any, Type
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class BrowserbaseLoadToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
|
||||
|
||||
class BrowserbaseLoadTool(BaseTool):
|
||||
name: str = "Browserbase web load tool"
|
||||
description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
|
||||
description: str = (
|
||||
"Load webpages url in a headless browser using Browserbase and return the contents"
|
||||
)
|
||||
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
|
||||
api_key: Optional[str] = None
|
||||
project_id: Optional[str] = None
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -3,8 +3,9 @@ import os
|
||||
from typing import List, Optional, Type
|
||||
|
||||
import docker
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
|
||||
class CodeInterpreterSchema(BaseModel):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import json
|
||||
from typing import Type
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from openai import OpenAI
|
||||
from pydantic.v1 import BaseModel
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
|
||||
@@ -1,38 +1,50 @@
|
||||
import os
|
||||
from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedDirectoryReadToolSchema(BaseModel):
|
||||
"""Input for DirectoryReadTool."""
|
||||
pass
|
||||
"""Input for DirectoryReadTool."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
|
||||
"""Input for DirectoryReadTool."""
|
||||
directory: str = Field(..., description="Mandatory directory to list content")
|
||||
"""Input for DirectoryReadTool."""
|
||||
|
||||
directory: str = Field(..., description="Mandatory directory to list content")
|
||||
|
||||
|
||||
class DirectoryReadTool(BaseTool):
|
||||
name: str = "List files in directory"
|
||||
description: str = "A tool that can be used to recursively list a directory's content."
|
||||
args_schema: Type[BaseModel] = DirectoryReadToolSchema
|
||||
directory: Optional[str] = None
|
||||
name: str = "List files in directory"
|
||||
description: str = (
|
||||
"A tool that can be used to recursively list a directory's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = DirectoryReadToolSchema
|
||||
directory: Optional[str] = None
|
||||
|
||||
def __init__(self, directory: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if directory is not None:
|
||||
self.directory = directory
|
||||
self.description = f"A tool that can be used to list {directory}'s content."
|
||||
self.args_schema = FixedDirectoryReadToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
directory = kwargs.get('directory', self.directory)
|
||||
if directory[-1] == "/":
|
||||
directory = directory[:-1]
|
||||
files_list = [f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}" for root, dirs, files in os.walk(directory) for filename in files]
|
||||
files = "\n- ".join(files_list)
|
||||
return f"File paths: \n-{files}"
|
||||
def __init__(self, directory: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if directory is not None:
|
||||
self.directory = directory
|
||||
self.description = f"A tool that can be used to list {directory}'s content."
|
||||
self.args_schema = FixedDirectoryReadToolSchema
|
||||
self._generate_description()
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
directory = kwargs.get("directory", self.directory)
|
||||
if directory[-1] == "/":
|
||||
directory = directory[:-1]
|
||||
files_list = [
|
||||
f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}"
|
||||
for root, dirs, files in os.walk(directory)
|
||||
for filename in files
|
||||
]
|
||||
files = "\n- ".join(files_list)
|
||||
return f"File paths: \n-{files}"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.loaders.directory_loader import DirectoryLoader
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -1,26 +1,32 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class FixedDOCXSearchToolSchema(BaseModel):
|
||||
"""Input for DOCXSearchTool."""
|
||||
docx: Optional[str] = Field(..., description="Mandatory docx path you want to search")
|
||||
|
||||
docx: Optional[str] = Field(
|
||||
..., description="Mandatory docx path you want to search"
|
||||
)
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the DOCX's content",
|
||||
)
|
||||
|
||||
|
||||
class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
|
||||
"""Input for DOCXSearchTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to search the DOCX's content",
|
||||
)
|
||||
|
||||
|
||||
class DOCXSearchTool(RagTool):
|
||||
name: str = "Search a DOCX's content"
|
||||
description: str = (
|
||||
@@ -56,9 +62,9 @@ class DOCXSearchTool(RagTool):
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
search_query = kwargs.get('search_query')
|
||||
search_query = kwargs.get("search_query")
|
||||
if search_query is None:
|
||||
search_query = kwargs.get('query')
|
||||
search_query = kwargs.get("query")
|
||||
|
||||
docx = kwargs.get("docx")
|
||||
if docx is not None:
|
||||
|
||||
@@ -1,36 +1,49 @@
|
||||
import os
|
||||
from typing import Type
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class EXABaseToolToolSchema(BaseModel):
|
||||
"""Input for EXABaseTool."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
|
||||
"""Input for EXABaseTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to search the internet"
|
||||
)
|
||||
|
||||
|
||||
class EXABaseTool(BaseTool):
|
||||
name: str = "Search the internet"
|
||||
description: str = "A tool that can be used to search the internet from a search_query"
|
||||
args_schema: Type[BaseModel] = EXABaseToolToolSchema
|
||||
search_url: str = "https://api.exa.ai/search"
|
||||
n_results: int = None
|
||||
headers: dict = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
name: str = "Search the internet"
|
||||
description: str = (
|
||||
"A tool that can be used to search the internet from a search_query"
|
||||
)
|
||||
args_schema: Type[BaseModel] = EXABaseToolToolSchema
|
||||
search_url: str = "https://api.exa.ai/search"
|
||||
n_results: int = None
|
||||
headers: dict = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
def _parse_results(self, results):
|
||||
stirng = []
|
||||
for result in results:
|
||||
try:
|
||||
stirng.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Score: {result['score']}",
|
||||
f"Url: {result['url']}",
|
||||
f"ID: {result['id']}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
next
|
||||
def _parse_results(self, results):
|
||||
stirng = []
|
||||
for result in results:
|
||||
try:
|
||||
stirng.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {result['title']}",
|
||||
f"Score: {result['score']}",
|
||||
f"Url: {result['url']}",
|
||||
f"ID: {result['id']}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
next
|
||||
|
||||
content = '\n'.join(stirng)
|
||||
return f"\nSearch results: {content}\n"
|
||||
content = "\n".join(stirng)
|
||||
return f"\nSearch results: {content}\n"
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedFileReadToolSchema(BaseModel):
|
||||
"""Input for FileReadTool."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class FileReadToolSchema(FixedFileReadToolSchema):
|
||||
"""Input for FileReadTool."""
|
||||
file_path: str = Field(
|
||||
...,
|
||||
description="Mandatory file full path to read the file"
|
||||
)
|
||||
|
||||
file_path: str = Field(..., description="Mandatory file full path to read the file")
|
||||
|
||||
|
||||
class FileReadTool(BaseTool):
|
||||
@@ -22,11 +23,7 @@ class FileReadTool(BaseTool):
|
||||
args_schema: Type[BaseModel] = FileReadToolSchema
|
||||
file_path: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_path: Optional[str] = None,
|
||||
**kwargs
|
||||
):
|
||||
def __init__(self, file_path: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if file_path is not None:
|
||||
self.file_path = file_path
|
||||
@@ -39,8 +36,8 @@ class FileReadTool(BaseTool):
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
try:
|
||||
file_path = kwargs.get('file_path', self.file_path)
|
||||
with open(file_path, 'r') as file:
|
||||
file_path = kwargs.get("file_path", self.file_path)
|
||||
with open(file_path, "r") as file:
|
||||
return file.read()
|
||||
except Exception as e:
|
||||
return f"Fail to read the file {file_path}. Error: {e}"
|
||||
|
||||
@@ -1,39 +1,46 @@
|
||||
import os
|
||||
from typing import Optional, Type, Any
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pydantic.v1 import BaseModel
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FileWriterToolInput(BaseModel):
|
||||
filename: str
|
||||
content: str
|
||||
directory: Optional[str] = None
|
||||
overwrite: bool = False
|
||||
|
||||
|
||||
class FileWriterTool(BaseTool):
|
||||
name: str = "File Writer Tool"
|
||||
description: str = "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
|
||||
description: str = (
|
||||
"A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
|
||||
)
|
||||
args_schema: Type[BaseModel] = FileWriterToolInput
|
||||
|
||||
def _run(self, **kwargs: Any) -> str:
|
||||
try:
|
||||
# Create the directory if it doesn't exist
|
||||
if kwargs['directory'] and not os.path.exists(kwargs['directory']):
|
||||
os.makedirs(kwargs['directory'])
|
||||
if kwargs["directory"] and not os.path.exists(kwargs["directory"]):
|
||||
os.makedirs(kwargs["directory"])
|
||||
|
||||
# Construct the full path
|
||||
filepath = os.path.join(kwargs['directory'] or '', kwargs['filename'])
|
||||
filepath = os.path.join(kwargs["directory"] or "", kwargs["filename"])
|
||||
|
||||
# Check if file exists and overwrite is not allowed
|
||||
if os.path.exists(filepath) and not kwargs['overwrite']:
|
||||
if os.path.exists(filepath) and not kwargs["overwrite"]:
|
||||
return f"File {filepath} already exists and overwrite option was not passed."
|
||||
|
||||
# Write content to the file
|
||||
mode = 'w' if kwargs['overwrite'] else 'x'
|
||||
mode = "w" if kwargs["overwrite"] else "x"
|
||||
with open(filepath, mode) as file:
|
||||
file.write(kwargs['content'])
|
||||
file.write(kwargs["content"])
|
||||
return f"Content successfully written to {filepath}"
|
||||
except FileExistsError:
|
||||
return f"File {filepath} already exists and overwrite option was not passed."
|
||||
return (
|
||||
f"File {filepath} already exists and overwrite option was not passed."
|
||||
)
|
||||
except Exception as e:
|
||||
return f"An error occurred while writing to the file: {str(e)}"
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
from typing import Optional, Any, Type, Dict, List
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
|
||||
crawler_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for crawling"
|
||||
)
|
||||
page_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for page"
|
||||
)
|
||||
|
||||
|
||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web crawl tool"
|
||||
@@ -17,22 +25,24 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
|
||||
if (crawler_options is None):
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
crawler_options: Optional[Dict[str, Any]] = None,
|
||||
page_options: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
if crawler_options is None:
|
||||
crawler_options = {}
|
||||
if (page_options is None):
|
||||
if page_options is None:
|
||||
page_options = {}
|
||||
|
||||
options = {
|
||||
"crawlerOptions": crawler_options,
|
||||
"pageOptions": page_options
|
||||
}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
|
||||
return self.firecrawl.crawl_url(url, options)
|
||||
|
||||
@@ -1,12 +1,23 @@
|
||||
from typing import Optional, Any, Type, Dict
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Dict, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
|
||||
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
|
||||
timeout: Optional[int] = Field(default=None, description="Timeout in milliseconds for the scraping operation. The default value is 30000.")
|
||||
page_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for page scraping"
|
||||
)
|
||||
extractor_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for data extraction"
|
||||
)
|
||||
timeout: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
|
||||
)
|
||||
|
||||
|
||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Firecrawl web scrape tool"
|
||||
@@ -18,15 +29,21 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
page_options: Optional[Dict[str, Any]] = None,
|
||||
extractor_options: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[int] = None,
|
||||
):
|
||||
if page_options is None:
|
||||
page_options = {}
|
||||
if extractor_options is None:
|
||||
@@ -37,6 +54,6 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"extractorOptions": extractor_options,
|
||||
"timeout": timeout
|
||||
"timeout": timeout,
|
||||
}
|
||||
return self.firecrawl.scrape_url(url, options)
|
||||
return self.firecrawl.scrape_url(url, options)
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
from typing import Optional, Any, Type, Dict, List
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class FirecrawlSearchToolSchema(BaseModel):
|
||||
query: str = Field(description="Search query")
|
||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
|
||||
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
|
||||
page_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for result formatting"
|
||||
)
|
||||
search_options: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Options for searching"
|
||||
)
|
||||
|
||||
|
||||
class FirecrawlSearchTool(BaseTool):
|
||||
name: str = "Firecrawl web search tool"
|
||||
@@ -17,22 +25,24 @@ class FirecrawlSearchTool(BaseTool):
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
from firecrawl import FirecrawlApp # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
raise ImportError(
|
||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||
)
|
||||
|
||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||
|
||||
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
|
||||
if (page_options is None):
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
page_options: Optional[Dict[str, Any]] = None,
|
||||
result_options: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
if page_options is None:
|
||||
page_options = {}
|
||||
if (result_options is None):
|
||||
if result_options is None:
|
||||
result_options = {}
|
||||
|
||||
options = {
|
||||
"pageOptions": page_options,
|
||||
"resultOptions": result_options
|
||||
}
|
||||
options = {"pageOptions": page_options, "resultOptions": result_options}
|
||||
return self.firecrawl.search(query, options)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, List, Optional, Type
|
||||
|
||||
from embedchain.loaders.github import GithubLoader
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
@@ -27,7 +27,9 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
|
||||
|
||||
class GithubSearchTool(RagTool):
|
||||
name: str = "Search a github repo's content"
|
||||
description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
|
||||
)
|
||||
summarize: bool = False
|
||||
gh_token: str
|
||||
args_schema: Type[BaseModel] = GithubSearchToolSchema
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -1,50 +1,48 @@
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from typing import Any, Optional, Type, cast
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from typing import Type, Any, cast, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class LlamaIndexTool(BaseTool):
|
||||
"""Tool to wrap LlamaIndex tools/query engines."""
|
||||
|
||||
llama_index_tool: Any
|
||||
|
||||
def _run(
|
||||
self,
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run tool."""
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
|
||||
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
||||
return tool(*args, **kwargs)
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_tool(
|
||||
cls,
|
||||
tool: Any,
|
||||
**kwargs: Any
|
||||
) -> "LlamaIndexTool":
|
||||
def from_tool(cls, tool: Any, **kwargs: Any) -> "LlamaIndexTool":
|
||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||
|
||||
|
||||
if not isinstance(tool, LlamaBaseTool):
|
||||
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
||||
tool = cast(LlamaBaseTool, tool)
|
||||
|
||||
if tool.metadata.fn_schema is None:
|
||||
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
|
||||
raise ValueError(
|
||||
"The LlamaIndex tool does not have an fn_schema specified."
|
||||
)
|
||||
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
|
||||
|
||||
|
||||
return cls(
|
||||
name=tool.metadata.name,
|
||||
description=tool.metadata.description,
|
||||
args_schema=args_schema,
|
||||
llama_index_tool=tool,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_query_engine(
|
||||
cls,
|
||||
@@ -52,7 +50,7 @@ class LlamaIndexTool(BaseTool):
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
return_direct: bool = False,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> "LlamaIndexTool":
|
||||
from llama_index.core.query_engine import BaseQueryEngine
|
||||
from llama_index.core.tools import QueryEngineTool
|
||||
@@ -60,10 +58,11 @@ class LlamaIndexTool(BaseTool):
|
||||
if not isinstance(query_engine, BaseQueryEngine):
|
||||
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
||||
|
||||
# NOTE: by default the schema expects an `input` variable. However this
|
||||
# NOTE: by default the schema expects an `input` variable. However this
|
||||
# confuses crewAI so we are renaming to `query`.
|
||||
class QueryToolSchema(BaseModel):
|
||||
"""Schema for query tool."""
|
||||
|
||||
query: str = Field(..., description="Search query for the query tool.")
|
||||
|
||||
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
|
||||
@@ -72,13 +71,9 @@ class LlamaIndexTool(BaseTool):
|
||||
name=name,
|
||||
description=description,
|
||||
return_direct=return_direct,
|
||||
resolve_input_errors=True,
|
||||
resolve_input_errors=True,
|
||||
)
|
||||
# HACK: we are replacing the schema with our custom schema
|
||||
query_engine_tool.metadata.fn_schema = QueryToolSchema
|
||||
|
||||
return cls.from_tool(
|
||||
query_engine_tool,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
return cls.from_tool(query_engine_tool, **kwargs)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Type
|
||||
|
||||
from embedchain.loaders.mysql import MySQLLoader
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
@@ -17,7 +17,9 @@ class MySQLSearchToolSchema(BaseModel):
|
||||
|
||||
class MySQLSearchTool(RagTool):
|
||||
name: str = "Search a database's table content"
|
||||
description: str = "A tool that can be used to semantic search a query from a database table's content."
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a database table's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = MySQLSearchToolSchema
|
||||
db_uri: str = Field(..., description="Mandatory database URI")
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic import model_validator
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Type
|
||||
|
||||
from embedchain.loaders.postgres import PostgresLoader
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
@@ -17,7 +17,9 @@ class PGSearchToolSchema(BaseModel):
|
||||
|
||||
class PGSearchTool(RagTool):
|
||||
name: str = "Search a database's table content"
|
||||
description: str = "A tool that can be used to semantic search a query from a database table's content."
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a database table's content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = PGSearchToolSchema
|
||||
db_uri: str = Field(..., description="Mandatory database URI")
|
||||
|
||||
|
||||
@@ -1,57 +1,76 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
|
||||
"""Input for ScrapeElementFromWebsiteTool."""
|
||||
pass
|
||||
"""Input for ScrapeElementFromWebsiteTool."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema):
|
||||
"""Input for ScrapeElementFromWebsiteTool."""
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
|
||||
"""Input for ScrapeElementFromWebsiteTool."""
|
||||
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
css_element: str = Field(
|
||||
...,
|
||||
description="Mandatory css reference for element to scrape from the website",
|
||||
)
|
||||
|
||||
|
||||
class ScrapeElementFromWebsiteTool(BaseTool):
|
||||
name: str = "Read a website content"
|
||||
description: str = "A tool that can be used to read a website content."
|
||||
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
|
||||
website_url: Optional[str] = None
|
||||
cookies: Optional[dict] = None
|
||||
css_element: Optional[str] = None
|
||||
headers: Optional[dict] = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Accept-Encoding': 'gzip, deflate, br'
|
||||
}
|
||||
|
||||
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.css_element = css_element
|
||||
self.description = f"A tool that can be used to read {website_url}'s content."
|
||||
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
|
||||
self._generate_description()
|
||||
if cookies is not None:
|
||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get('website_url', self.website_url)
|
||||
css_element = kwargs.get('css_element', self.css_element)
|
||||
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
|
||||
parsed = BeautifulSoup(page.content, "html.parser")
|
||||
elements = parsed.select(css_element)
|
||||
return "\n".join([element.get_text() for element in elements])
|
||||
|
||||
name: str = "Read a website content"
|
||||
description: str = "A tool that can be used to read a website content."
|
||||
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
|
||||
website_url: Optional[str] = None
|
||||
cookies: Optional[dict] = None
|
||||
css_element: Optional[str] = None
|
||||
headers: Optional[dict] = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Referer": "https://www.google.com/",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
website_url: Optional[str] = None,
|
||||
cookies: Optional[dict] = None,
|
||||
css_element: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.css_element = css_element
|
||||
self.description = (
|
||||
f"A tool that can be used to read {website_url}'s content."
|
||||
)
|
||||
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
|
||||
self._generate_description()
|
||||
if cookies is not None:
|
||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get("website_url", self.website_url)
|
||||
css_element = kwargs.get("css_element", self.css_element)
|
||||
page = requests.get(
|
||||
website_url,
|
||||
headers=self.headers,
|
||||
cookies=self.cookies if self.cookies else {},
|
||||
)
|
||||
parsed = BeautifulSoup(page.content, "html.parser")
|
||||
elements = parsed.select(css_element)
|
||||
return "\n".join([element.get_text() for element in elements])
|
||||
|
||||
@@ -1,59 +1,73 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedScrapeWebsiteToolSchema(BaseModel):
|
||||
"""Input for ScrapeWebsiteTool."""
|
||||
pass
|
||||
"""Input for ScrapeWebsiteTool."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema):
|
||||
"""Input for ScrapeWebsiteTool."""
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
"""Input for ScrapeWebsiteTool."""
|
||||
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
|
||||
|
||||
class ScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Read website content"
|
||||
description: str = "A tool that can be used to read a website content."
|
||||
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
|
||||
website_url: Optional[str] = None
|
||||
cookies: Optional[dict] = None
|
||||
headers: Optional[dict] = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1'
|
||||
}
|
||||
name: str = "Read website content"
|
||||
description: str = "A tool that can be used to read a website content."
|
||||
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
|
||||
website_url: Optional[str] = None
|
||||
cookies: Optional[dict] = None
|
||||
headers: Optional[dict] = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Referer": "https://www.google.com/",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
}
|
||||
|
||||
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.description = f"A tool that can be used to read {website_url}'s content."
|
||||
self.args_schema = FixedScrapeWebsiteToolSchema
|
||||
self._generate_description()
|
||||
if cookies is not None:
|
||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||
def __init__(
|
||||
self,
|
||||
website_url: Optional[str] = None,
|
||||
cookies: Optional[dict] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.description = (
|
||||
f"A tool that can be used to read {website_url}'s content."
|
||||
)
|
||||
self.args_schema = FixedScrapeWebsiteToolSchema
|
||||
self._generate_description()
|
||||
if cookies is not None:
|
||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get('website_url', self.website_url)
|
||||
page = requests.get(
|
||||
website_url,
|
||||
timeout=15,
|
||||
headers=self.headers,
|
||||
cookies=self.cookies if self.cookies else {}
|
||||
)
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get("website_url", self.website_url)
|
||||
page = requests.get(
|
||||
website_url,
|
||||
timeout=15,
|
||||
headers=self.headers,
|
||||
cookies=self.cookies if self.cookies else {},
|
||||
)
|
||||
|
||||
page.encoding = page.apparent_encoding
|
||||
parsed = BeautifulSoup(page.text, "html.parser")
|
||||
page.encoding = page.apparent_encoding
|
||||
parsed = BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
text = parsed.get_text()
|
||||
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
||||
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
|
||||
return text
|
||||
text = parsed.get_text()
|
||||
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
|
||||
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
|
||||
return text
|
||||
|
||||
@@ -1,20 +1,31 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Literal, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from typing import Optional, Any, Type, Dict, Literal
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
|
||||
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
|
||||
url: str = Field(description="Webpage URL")
|
||||
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format")
|
||||
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config")
|
||||
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures")
|
||||
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(
|
||||
default="markdown", description="Webpage extraction format"
|
||||
)
|
||||
scrape_config: Optional[Dict[str, Any]] = Field(
|
||||
default=None, description="Scrapfly request scrape config"
|
||||
)
|
||||
ignore_scrape_failures: Optional[bool] = Field(
|
||||
default=None, description="whether to ignore failures"
|
||||
)
|
||||
|
||||
|
||||
class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||
name: str = "Scrapfly web scraping API tool"
|
||||
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text"
|
||||
description: str = (
|
||||
"Scrape a webpage url using Scrapfly and return its content as markdown or text"
|
||||
)
|
||||
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
|
||||
api_key: str = None
|
||||
scrapfly: Optional[Any] = None
|
||||
@@ -29,7 +40,13 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||
)
|
||||
self.scrapfly = ScrapflyClient(key=api_key)
|
||||
|
||||
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None):
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
scrape_format: str = "markdown",
|
||||
scrape_config: Optional[Dict[str, Any]] = None,
|
||||
ignore_scrape_failures: Optional[bool] = None,
|
||||
):
|
||||
from scrapfly import ScrapeApiResponse, ScrapeConfig
|
||||
|
||||
scrape_config = scrape_config if scrape_config is not None else {}
|
||||
@@ -44,4 +61,3 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||
return None
|
||||
else:
|
||||
raise e
|
||||
|
||||
@@ -1,77 +1,94 @@
|
||||
from typing import Optional, Type, Any
|
||||
import time
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from pydantic import BaseModel, Field
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from ..base_tool import BaseTool
|
||||
|
||||
|
||||
class FixedSeleniumScrapingToolSchema(BaseModel):
|
||||
"""Input for SeleniumScrapingTool."""
|
||||
pass
|
||||
"""Input for SeleniumScrapingTool."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
|
||||
"""Input for SeleniumScrapingTool."""
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
|
||||
"""Input for SeleniumScrapingTool."""
|
||||
|
||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||
css_element: str = Field(
|
||||
...,
|
||||
description="Mandatory css reference for element to scrape from the website",
|
||||
)
|
||||
|
||||
|
||||
class SeleniumScrapingTool(BaseTool):
|
||||
name: str = "Read a website content"
|
||||
description: str = "A tool that can be used to read a website content."
|
||||
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
|
||||
website_url: Optional[str] = None
|
||||
driver: Optional[Any] = webdriver.Chrome
|
||||
cookie: Optional[dict] = None
|
||||
wait_time: Optional[int] = 3
|
||||
css_element: Optional[str] = None
|
||||
name: str = "Read a website content"
|
||||
description: str = "A tool that can be used to read a website content."
|
||||
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
|
||||
website_url: Optional[str] = None
|
||||
driver: Optional[Any] = webdriver.Chrome
|
||||
cookie: Optional[dict] = None
|
||||
wait_time: Optional[int] = 3
|
||||
css_element: Optional[str] = None
|
||||
|
||||
def __init__(self, website_url: Optional[str] = None, cookie: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if cookie is not None:
|
||||
self.cookie = cookie
|
||||
def __init__(
|
||||
self,
|
||||
website_url: Optional[str] = None,
|
||||
cookie: Optional[dict] = None,
|
||||
css_element: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if cookie is not None:
|
||||
self.cookie = cookie
|
||||
|
||||
if css_element is not None:
|
||||
self.css_element = css_element
|
||||
if css_element is not None:
|
||||
self.css_element = css_element
|
||||
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.description = f"A tool that can be used to read {website_url}'s content."
|
||||
self.args_schema = FixedSeleniumScrapingToolSchema
|
||||
if website_url is not None:
|
||||
self.website_url = website_url
|
||||
self.description = (
|
||||
f"A tool that can be used to read {website_url}'s content."
|
||||
)
|
||||
self.args_schema = FixedSeleniumScrapingToolSchema
|
||||
|
||||
self._generate_description()
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get('website_url', self.website_url)
|
||||
css_element = kwargs.get('css_element', self.css_element)
|
||||
driver = self._create_driver(website_url, self.cookie, self.wait_time)
|
||||
self._generate_description()
|
||||
|
||||
content = []
|
||||
if css_element is None or css_element.strip() == "":
|
||||
body_text = driver.find_element(By.TAG_NAME, "body").text
|
||||
content.append(body_text)
|
||||
else:
|
||||
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
|
||||
content.append(element.text)
|
||||
driver.close()
|
||||
return "\n".join(content)
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
website_url = kwargs.get("website_url", self.website_url)
|
||||
css_element = kwargs.get("css_element", self.css_element)
|
||||
driver = self._create_driver(website_url, self.cookie, self.wait_time)
|
||||
|
||||
def _create_driver(self, url, cookie, wait_time):
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
driver = self.driver(options=options)
|
||||
driver.get(url)
|
||||
time.sleep(wait_time)
|
||||
if cookie:
|
||||
driver.add_cookie(cookie)
|
||||
time.sleep(wait_time)
|
||||
driver.get(url)
|
||||
time.sleep(wait_time)
|
||||
return driver
|
||||
content = []
|
||||
if css_element is None or css_element.strip() == "":
|
||||
body_text = driver.find_element(By.TAG_NAME, "body").text
|
||||
content.append(body_text)
|
||||
else:
|
||||
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
|
||||
content.append(element.text)
|
||||
driver.close()
|
||||
return "\n".join(content)
|
||||
|
||||
def close(self):
|
||||
self.driver.close()
|
||||
def _create_driver(self, url, cookie, wait_time):
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
driver = self.driver(options=options)
|
||||
driver.get(url)
|
||||
time.sleep(wait_time)
|
||||
if cookie:
|
||||
driver.add_cookie(cookie)
|
||||
time.sleep(wait_time)
|
||||
driver.get(url)
|
||||
time.sleep(wait_time)
|
||||
return driver
|
||||
|
||||
def close(self):
|
||||
self.driver.close()
|
||||
|
||||
@@ -1,80 +1,94 @@
|
||||
import datetime
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from typing import Optional, Type, Any
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
def _save_results_to_file(content: str) -> None:
|
||||
"""Saves the search results to a file."""
|
||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
with open(filename, 'w') as file:
|
||||
file.write(content)
|
||||
print(f"Results saved to {filename}")
|
||||
"""Saves the search results to a file."""
|
||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
with open(filename, "w") as file:
|
||||
file.write(content)
|
||||
print(f"Results saved to {filename}")
|
||||
|
||||
|
||||
class SerperDevToolSchema(BaseModel):
|
||||
"""Input for SerperDevTool."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
|
||||
"""Input for SerperDevTool."""
|
||||
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to search the internet"
|
||||
)
|
||||
|
||||
|
||||
class SerperDevTool(BaseTool):
|
||||
name: str = "Search the internet"
|
||||
description: str = "A tool that can be used to search the internet with a search_query."
|
||||
args_schema: Type[BaseModel] = SerperDevToolSchema
|
||||
search_url: str = "https://google.serper.dev/search"
|
||||
country: Optional[str] = ''
|
||||
location: Optional[str] = ''
|
||||
locale: Optional[str] = ''
|
||||
n_results: int = 10
|
||||
save_file: bool = False
|
||||
name: str = "Search the internet"
|
||||
description: str = (
|
||||
"A tool that can be used to search the internet with a search_query."
|
||||
)
|
||||
args_schema: Type[BaseModel] = SerperDevToolSchema
|
||||
search_url: str = "https://google.serper.dev/search"
|
||||
country: Optional[str] = ""
|
||||
location: Optional[str] = ""
|
||||
locale: Optional[str] = ""
|
||||
n_results: int = 10
|
||||
save_file: bool = False
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
|
||||
search_query = kwargs.get('search_query') or kwargs.get('query')
|
||||
save_file = kwargs.get('save_file', self.save_file)
|
||||
n_results = kwargs.get('n_results', self.n_results)
|
||||
search_query = kwargs.get("search_query") or kwargs.get("query")
|
||||
save_file = kwargs.get("save_file", self.save_file)
|
||||
n_results = kwargs.get("n_results", self.n_results)
|
||||
|
||||
payload = { "q": search_query, "num": n_results }
|
||||
payload = {"q": search_query, "num": n_results}
|
||||
|
||||
if self.country != '':
|
||||
payload["gl"] = self.country
|
||||
if self.location != '':
|
||||
payload["location"] = self.location
|
||||
if self.locale != '':
|
||||
payload["hl"] = self.locale
|
||||
if self.country != "":
|
||||
payload["gl"] = self.country
|
||||
if self.location != "":
|
||||
payload["location"] = self.location
|
||||
if self.locale != "":
|
||||
payload["hl"] = self.locale
|
||||
|
||||
payload = json.dumps(payload)
|
||||
payload = json.dumps(payload)
|
||||
|
||||
headers = {
|
||||
'X-API-KEY': os.environ['SERPER_API_KEY'],
|
||||
'content-type': 'application/json'
|
||||
}
|
||||
headers = {
|
||||
"X-API-KEY": os.environ["SERPER_API_KEY"],
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
||||
response = requests.request("POST", self.search_url, headers=headers, data=payload)
|
||||
results = response.json()
|
||||
response = requests.request(
|
||||
"POST", self.search_url, headers=headers, data=payload
|
||||
)
|
||||
results = response.json()
|
||||
|
||||
if 'organic' in results:
|
||||
results = results['organic'][:self.n_results]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['link']}",
|
||||
f"Snippet: {result['snippet']}",
|
||||
"---"
|
||||
]))
|
||||
except KeyError:
|
||||
continue
|
||||
if "organic" in results:
|
||||
results = results["organic"][: self.n_results]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['link']}",
|
||||
f"Snippet: {result['snippet']}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
if save_file:
|
||||
_save_results_to_file(content)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
content = "\n".join(string)
|
||||
if save_file:
|
||||
_save_results_to_file(content)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
|
||||
@@ -1,19 +1,27 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class SerplyJobSearchToolSchema(BaseModel):
|
||||
"""Input for Job Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to fetch jobs postings.",
|
||||
)
|
||||
|
||||
|
||||
class SerplyJobSearchTool(RagTool):
|
||||
name: str = "Job Search"
|
||||
description: str = "A tool to perform to perform a job search in the US with a search_query."
|
||||
description: str = (
|
||||
"A tool to perform to perform a job search in the US with a search_query."
|
||||
)
|
||||
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
|
||||
request_url: str = "https://api.serply.io/v1/job/search/"
|
||||
proxy_location: Optional[str] = "US"
|
||||
@@ -23,20 +31,17 @@ class SerplyJobSearchTool(RagTool):
|
||||
"""
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs
|
||||
):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": self.proxy_location
|
||||
"X-Proxy-Location": self.proxy_location,
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
query_payload = {}
|
||||
|
||||
@@ -58,18 +63,22 @@ class SerplyJobSearchTool(RagTool):
|
||||
string = []
|
||||
for job in jobs:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Position: {job['position']}",
|
||||
f"Employer: {job['employer']}",
|
||||
f"Location: {job['location']}",
|
||||
f"Link: {job['link']}",
|
||||
f"""Highest: {', '.join([h for h in job['highlights']])}""",
|
||||
f"Is Remote: {job['is_remote']}",
|
||||
f"Is Hybrid: {job['is_remote']}",
|
||||
"---"
|
||||
]))
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Position: {job['position']}",
|
||||
f"Employer: {job['employer']}",
|
||||
f"Location: {job['location']}",
|
||||
f"Link: {job['link']}",
|
||||
f"""Highest: {', '.join([h for h in job['highlights']])}""",
|
||||
f"Is Remote: {job['is_remote']}",
|
||||
f"Is Hybrid: {job['is_remote']}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
content = "\n".join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyNewsSearchToolSchema(BaseModel):
|
||||
"""Input for Serply News Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
|
||||
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to fetch news articles"
|
||||
)
|
||||
|
||||
|
||||
class SerplyNewsSearchTool(BaseTool):
|
||||
@@ -21,15 +26,12 @@ class SerplyNewsSearchTool(BaseTool):
|
||||
limit: Optional[int] = 10
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
limit: Optional[int] = 10,
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
self, limit: Optional[int] = 10, proxy_location: Optional[str] = "US", **kwargs
|
||||
):
|
||||
"""
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.limit = limit
|
||||
@@ -37,12 +39,12 @@ class SerplyNewsSearchTool(BaseTool):
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
"X-Proxy-Location": proxy_location,
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
# build query parameters
|
||||
query_payload = {}
|
||||
@@ -58,24 +60,28 @@ class SerplyNewsSearchTool(BaseTool):
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
results = response.json()
|
||||
if "entries" in results:
|
||||
results = results['entries']
|
||||
results = results["entries"]
|
||||
string = []
|
||||
for result in results[:self.limit]:
|
||||
for result in results[: self.limit]:
|
||||
try:
|
||||
# follow url
|
||||
r = requests.get(result['link'])
|
||||
final_link = r.history[-1].headers['Location']
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {final_link}",
|
||||
f"Source: {result['source']['title']}",
|
||||
f"Published: {result['published']}",
|
||||
"---"
|
||||
]))
|
||||
r = requests.get(result["link"])
|
||||
final_link = r.history[-1].headers["Location"]
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {final_link}",
|
||||
f"Source: {result['source']['title']}",
|
||||
f"Published: {result['published']}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
content = "\n".join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
|
||||
@@ -1,36 +1,39 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyScholarSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Scholar Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
|
||||
|
||||
search_query: str = Field(
|
||||
...,
|
||||
description="Mandatory search query you want to use to fetch scholarly literature",
|
||||
)
|
||||
|
||||
|
||||
class SerplyScholarSearchTool(BaseTool):
|
||||
name: str = "Scholar Search"
|
||||
description: str = "A tool to perform scholarly literature search with a search_query."
|
||||
description: str = (
|
||||
"A tool to perform scholarly literature search with a search_query."
|
||||
)
|
||||
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
|
||||
search_url: str = "https://api.serply.io/v1/scholar/"
|
||||
hl: Optional[str] = "us"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hl: str = "us",
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
def __init__(self, hl: str = "us", proxy_location: Optional[str] = "US", **kwargs):
|
||||
"""
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.hl = hl
|
||||
@@ -38,16 +41,14 @@ class SerplyScholarSearchTool(BaseTool):
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
"X-Proxy-Location": proxy_location,
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
query_payload = {
|
||||
"hl": self.hl
|
||||
}
|
||||
query_payload = {"hl": self.hl}
|
||||
|
||||
if "query" in kwargs:
|
||||
query_payload["q"] = kwargs["query"]
|
||||
@@ -67,20 +68,24 @@ class SerplyScholarSearchTool(BaseTool):
|
||||
for article in articles:
|
||||
try:
|
||||
if "doc" in article:
|
||||
link = article['doc']['link']
|
||||
link = article["doc"]["link"]
|
||||
else:
|
||||
link = article['link']
|
||||
authors = [author['name'] for author in article['author']['authors']]
|
||||
string.append('\n'.join([
|
||||
f"Title: {article['title']}",
|
||||
f"Link: {link}",
|
||||
f"Description: {article['description']}",
|
||||
f"Cite: {article['cite']}",
|
||||
f"Authors: {', '.join(authors)}",
|
||||
"---"
|
||||
]))
|
||||
link = article["link"]
|
||||
authors = [author["name"] for author in article["author"]["authors"]]
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {article['title']}",
|
||||
f"Link: {link}",
|
||||
f"Description: {article['description']}",
|
||||
f"Cite: {article['cite']}",
|
||||
f"Authors: {', '.join(authors)}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
content = "\n".join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
import os
|
||||
import requests
|
||||
from typing import Any, Optional, Type
|
||||
from urllib.parse import urlencode
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SerplyWebSearchToolSchema(BaseModel):
|
||||
"""Input for Serply Web Search."""
|
||||
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
|
||||
|
||||
search_query: str = Field(
|
||||
..., description="Mandatory search query you want to use to Google search"
|
||||
)
|
||||
|
||||
|
||||
class SerplyWebSearchTool(BaseTool):
|
||||
@@ -24,21 +29,21 @@ class SerplyWebSearchTool(BaseTool):
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
hl: str = "us",
|
||||
limit: int = 10,
|
||||
device_type: str = "desktop",
|
||||
proxy_location: str = "US",
|
||||
**kwargs
|
||||
self,
|
||||
hl: str = "us",
|
||||
limit: int = 10,
|
||||
device_type: str = "desktop",
|
||||
proxy_location: str = "US",
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
param: query (str): The query to search for
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
param: device_type (str): desktop/mobile results (defaults to desktop)
|
||||
proxy_location: (str): Where to perform the search, specifically for local/regional results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
param: query (str): The query to search for
|
||||
param: hl (str): host Language code to display results in
|
||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||
param: device_type (str): desktop/mobile results (defaults to desktop)
|
||||
proxy_location: (str): Where to perform the search, specifically for local/regional results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@@ -50,18 +55,18 @@ class SerplyWebSearchTool(BaseTool):
|
||||
self.query_payload = {
|
||||
"num": limit,
|
||||
"gl": proxy_location.upper(),
|
||||
"hl": hl.lower()
|
||||
"hl": hl.lower(),
|
||||
}
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"X-User-Agent": device_type,
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
"X-Proxy-Location": proxy_location,
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
if "query" in kwargs:
|
||||
self.query_payload["q"] = kwargs["query"]
|
||||
@@ -74,20 +79,24 @@ class SerplyWebSearchTool(BaseTool):
|
||||
response = requests.request("GET", url, headers=self.headers)
|
||||
results = response.json()
|
||||
if "results" in results:
|
||||
results = results['results']
|
||||
results = results["results"]
|
||||
string = []
|
||||
for result in results:
|
||||
try:
|
||||
string.append('\n'.join([
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['link']}",
|
||||
f"Description: {result['description'].strip()}",
|
||||
"---"
|
||||
]))
|
||||
string.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"Title: {result['title']}",
|
||||
f"Link: {result['link']}",
|
||||
f"Description: {result['description'].strip()}",
|
||||
"---",
|
||||
]
|
||||
)
|
||||
)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
content = '\n'.join(string)
|
||||
content = "\n".join(string)
|
||||
return f"\nSearch results: {content}\n"
|
||||
else:
|
||||
return results
|
||||
|
||||
@@ -1,48 +1,50 @@
|
||||
import os
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
import requests
|
||||
from typing import Type, Any, Optional
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
class SerplyWebpageToMarkdownToolSchema(BaseModel):
|
||||
"""Input for Serply Search."""
|
||||
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
|
||||
|
||||
url: str = Field(
|
||||
...,
|
||||
description="Mandatory url you want to use to fetch and convert to markdown",
|
||||
)
|
||||
|
||||
|
||||
class SerplyWebpageToMarkdownTool(RagTool):
|
||||
name: str = "Webpage to Markdown"
|
||||
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
|
||||
description: str = (
|
||||
"A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
|
||||
)
|
||||
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
|
||||
request_url: str = "https://api.serply.io/v1/request"
|
||||
proxy_location: Optional[str] = "US"
|
||||
headers: Optional[dict] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
proxy_location: Optional[str] = "US",
|
||||
**kwargs
|
||||
):
|
||||
def __init__(self, proxy_location: Optional[str] = "US", **kwargs):
|
||||
"""
|
||||
proxy_location: (str): Where to perform the search, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
proxy_location: (str): Where to perform the search, specifically for a specific country results.
|
||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.proxy_location = proxy_location
|
||||
self.headers = {
|
||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||
"User-Agent": "crew-tools",
|
||||
"X-Proxy-Location": proxy_location
|
||||
"X-Proxy-Location": proxy_location,
|
||||
}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
data = {
|
||||
"url": kwargs["url"],
|
||||
"method": "GET",
|
||||
"response_type": "markdown"
|
||||
}
|
||||
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
|
||||
data = {"url": kwargs["url"], "method": "GET", "response_type": "markdown"}
|
||||
response = requests.request(
|
||||
"POST", self.request_url, headers=self.headers, json=data
|
||||
)
|
||||
return response.text
|
||||
|
||||
@@ -1,21 +1,25 @@
|
||||
from typing import Optional, Any, Type, Dict, Literal
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from typing import Any, Dict, Literal, Optional, Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class SpiderToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
params: Optional[Dict[str, Any]] = Field(
|
||||
description="Set additional params. Options include:\n"
|
||||
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
|
||||
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
|
||||
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
|
||||
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
|
||||
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
|
||||
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
|
||||
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
|
||||
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
|
||||
)
|
||||
mode: Literal["scrape", "crawl"] = Field(
|
||||
default="scrape",
|
||||
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set."
|
||||
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.",
|
||||
)
|
||||
|
||||
|
||||
class SpiderTool(BaseTool):
|
||||
name: str = "Spider scrape & crawl tool"
|
||||
description: str = "Scrape & Crawl any url and return LLM-ready data."
|
||||
@@ -26,11 +30,11 @@ class SpiderTool(BaseTool):
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from spider import Spider # type: ignore
|
||||
from spider import Spider # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`spider-client` package not found, please run `pip install spider-client`"
|
||||
)
|
||||
raise ImportError(
|
||||
"`spider-client` package not found, please run `pip install spider-client`"
|
||||
)
|
||||
|
||||
self.spider = Spider(api_key=api_key)
|
||||
|
||||
@@ -38,7 +42,7 @@ class SpiderTool(BaseTool):
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
mode: Optional[Literal["scrape", "crawl"]] = "scrape"
|
||||
mode: Optional[Literal["scrape", "crawl"]] = "scrape",
|
||||
):
|
||||
if mode not in ["scrape", "crawl"]:
|
||||
raise ValueError(
|
||||
@@ -51,9 +55,7 @@ class SpiderTool(BaseTool):
|
||||
else:
|
||||
params = {"return_format": "markdown"}
|
||||
|
||||
action = (
|
||||
self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
|
||||
)
|
||||
action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
|
||||
spider_docs = action(url=url, params=params)
|
||||
|
||||
return spider_docs
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -2,9 +2,10 @@ import base64
|
||||
from typing import Type
|
||||
|
||||
import requests
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
from openai import OpenAI
|
||||
from pydantic.v1 import BaseModel
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai_tools.tools.base_tool import BaseTool
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
@@ -25,7 +25,9 @@ class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema):
|
||||
|
||||
class WebsiteSearchTool(RagTool):
|
||||
name: str = "Search in a specific website"
|
||||
description: str = "A tool that can be used to semantic search a query from a specific URL content."
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a specific URL content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = WebsiteSearchToolSchema
|
||||
|
||||
def __init__(self, website: Optional[str] = None, **kwargs):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
@@ -25,7 +25,9 @@ class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema):
|
||||
|
||||
class YoutubeChannelSearchTool(RagTool):
|
||||
name: str = "Search a Youtube Channels content"
|
||||
description: str = "A tool that can be used to semantic search a query from a Youtube Channels content."
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a Youtube Channels content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema
|
||||
|
||||
def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from embedchain.models.data_type import DataType
|
||||
from pydantic.v1 import BaseModel, Field
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..rag.rag_tool import RagTool
|
||||
|
||||
@@ -25,7 +25,9 @@ class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema):
|
||||
|
||||
class YoutubeVideoSearchTool(RagTool):
|
||||
name: str = "Search a Youtube Video content"
|
||||
description: str = "A tool that can be used to semantic search a query from a Youtube Video content."
|
||||
description: str = (
|
||||
"A tool that can be used to semantic search a query from a Youtube Video content."
|
||||
)
|
||||
args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema
|
||||
|
||||
def __init__(self, youtube_video_url: Optional[str] = None, **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user