mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
Merge pull request #101 from crewAIInc/brandon/cre-250-llamaindex-pydantic-v2
Pydantic v1 issues
This commit is contained in:
@@ -3,11 +3,11 @@ from typing import Any, Callable, Optional, Type
|
|||||||
|
|
||||||
from langchain_core.tools import StructuredTool
|
from langchain_core.tools import StructuredTool
|
||||||
from pydantic import BaseModel, ConfigDict, Field, validator
|
from pydantic import BaseModel, ConfigDict, Field, validator
|
||||||
from pydantic.v1 import BaseModel as V1BaseModel
|
from pydantic import BaseModel as PydanticBaseModel
|
||||||
|
|
||||||
|
|
||||||
class BaseTool(BaseModel, ABC):
|
class BaseTool(BaseModel, ABC):
|
||||||
class _ArgsSchemaPlaceholder(V1BaseModel):
|
class _ArgsSchemaPlaceholder(PydanticBaseModel):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
model_config = ConfigDict()
|
model_config = ConfigDict()
|
||||||
@@ -16,7 +16,7 @@ class BaseTool(BaseModel, ABC):
|
|||||||
"""The unique name of the tool that clearly communicates its purpose."""
|
"""The unique name of the tool that clearly communicates its purpose."""
|
||||||
description: str
|
description: str
|
||||||
"""Used to tell the model how/when/why to use the tool."""
|
"""Used to tell the model how/when/why to use the tool."""
|
||||||
args_schema: Type[V1BaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
|
args_schema: Type[PydanticBaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
|
||||||
"""The schema for the arguments that the tool accepts."""
|
"""The schema for the arguments that the tool accepts."""
|
||||||
description_updated: bool = False
|
description_updated: bool = False
|
||||||
"""Flag to check if the description has been updated."""
|
"""Flag to check if the description has been updated."""
|
||||||
@@ -26,13 +26,15 @@ class BaseTool(BaseModel, ABC):
|
|||||||
"""Flag to check if the tool should be the final agent answer."""
|
"""Flag to check if the tool should be the final agent answer."""
|
||||||
|
|
||||||
@validator("args_schema", always=True, pre=True)
|
@validator("args_schema", always=True, pre=True)
|
||||||
def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
|
def _default_args_schema(
|
||||||
|
cls, v: Type[PydanticBaseModel]
|
||||||
|
) -> Type[PydanticBaseModel]:
|
||||||
if not isinstance(v, cls._ArgsSchemaPlaceholder):
|
if not isinstance(v, cls._ArgsSchemaPlaceholder):
|
||||||
return v
|
return v
|
||||||
|
|
||||||
return type(
|
return type(
|
||||||
f"{cls.__name__}Schema",
|
f"{cls.__name__}Schema",
|
||||||
(V1BaseModel,),
|
(PydanticBaseModel,),
|
||||||
{
|
{
|
||||||
"__annotations__": {
|
"__annotations__": {
|
||||||
k: v for k, v in cls._run.__annotations__.items() if k != "return"
|
k: v for k, v in cls._run.__annotations__.items() if k != "return"
|
||||||
@@ -75,7 +77,7 @@ class BaseTool(BaseModel, ABC):
|
|||||||
class_name = f"{self.__class__.__name__}Schema"
|
class_name = f"{self.__class__.__name__}Schema"
|
||||||
self.args_schema = type(
|
self.args_schema = type(
|
||||||
class_name,
|
class_name,
|
||||||
(V1BaseModel,),
|
(PydanticBaseModel,),
|
||||||
{
|
{
|
||||||
"__annotations__": {
|
"__annotations__": {
|
||||||
k: v
|
k: v
|
||||||
@@ -127,7 +129,7 @@ def tool(*args):
|
|||||||
class_name = "".join(tool_name.split()).title()
|
class_name = "".join(tool_name.split()).title()
|
||||||
args_schema = type(
|
args_schema = type(
|
||||||
class_name,
|
class_name,
|
||||||
(V1BaseModel,),
|
(PydanticBaseModel,),
|
||||||
{
|
{
|
||||||
"__annotations__": {
|
"__annotations__": {
|
||||||
k: v for k, v in f.__annotations__.items() if k != "return"
|
k: v for k, v in f.__annotations__.items() if k != "return"
|
||||||
|
|||||||
@@ -1,13 +1,19 @@
|
|||||||
from typing import Optional, Any, Type
|
from typing import Any, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class BrowserbaseLoadToolSchema(BaseModel):
|
class BrowserbaseLoadToolSchema(BaseModel):
|
||||||
url: str = Field(description="Website URL")
|
url: str = Field(description="Website URL")
|
||||||
|
|
||||||
|
|
||||||
class BrowserbaseLoadTool(BaseTool):
|
class BrowserbaseLoadTool(BaseTool):
|
||||||
name: str = "Browserbase web load tool"
|
name: str = "Browserbase web load tool"
|
||||||
description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
|
description: str = (
|
||||||
|
"Load webpages url in a headless browser using Browserbase and return the contents"
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
|
args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
project_id: Optional[str] = None
|
project_id: Optional[str] = None
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -3,8 +3,9 @@ import os
|
|||||||
from typing import List, Optional, Type
|
from typing import List, Optional, Type
|
||||||
|
|
||||||
import docker
|
import docker
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
|
||||||
|
|
||||||
class CodeInterpreterSchema(BaseModel):
|
class CodeInterpreterSchema(BaseModel):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
from typing import Type
|
from typing import Type
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from pydantic.v1 import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class ImagePromptSchema(BaseModel):
|
class ImagePromptSchema(BaseModel):
|
||||||
|
|||||||
@@ -1,38 +1,50 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional, Type, Any
|
from typing import Any, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..base_tool import BaseTool
|
from ..base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FixedDirectoryReadToolSchema(BaseModel):
|
class FixedDirectoryReadToolSchema(BaseModel):
|
||||||
"""Input for DirectoryReadTool."""
|
"""Input for DirectoryReadTool."""
|
||||||
pass
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
|
class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
|
||||||
"""Input for DirectoryReadTool."""
|
"""Input for DirectoryReadTool."""
|
||||||
directory: str = Field(..., description="Mandatory directory to list content")
|
|
||||||
|
directory: str = Field(..., description="Mandatory directory to list content")
|
||||||
|
|
||||||
|
|
||||||
class DirectoryReadTool(BaseTool):
|
class DirectoryReadTool(BaseTool):
|
||||||
name: str = "List files in directory"
|
name: str = "List files in directory"
|
||||||
description: str = "A tool that can be used to recursively list a directory's content."
|
description: str = (
|
||||||
args_schema: Type[BaseModel] = DirectoryReadToolSchema
|
"A tool that can be used to recursively list a directory's content."
|
||||||
directory: Optional[str] = None
|
)
|
||||||
|
args_schema: Type[BaseModel] = DirectoryReadToolSchema
|
||||||
|
directory: Optional[str] = None
|
||||||
|
|
||||||
def __init__(self, directory: Optional[str] = None, **kwargs):
|
def __init__(self, directory: Optional[str] = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
if directory is not None:
|
if directory is not None:
|
||||||
self.directory = directory
|
self.directory = directory
|
||||||
self.description = f"A tool that can be used to list {directory}'s content."
|
self.description = f"A tool that can be used to list {directory}'s content."
|
||||||
self.args_schema = FixedDirectoryReadToolSchema
|
self.args_schema = FixedDirectoryReadToolSchema
|
||||||
self._generate_description()
|
self._generate_description()
|
||||||
|
|
||||||
def _run(
|
|
||||||
self,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> Any:
|
|
||||||
directory = kwargs.get('directory', self.directory)
|
|
||||||
if directory[-1] == "/":
|
|
||||||
directory = directory[:-1]
|
|
||||||
files_list = [f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}" for root, dirs, files in os.walk(directory) for filename in files]
|
|
||||||
files = "\n- ".join(files_list)
|
|
||||||
return f"File paths: \n-{files}"
|
|
||||||
|
|
||||||
|
def _run(
|
||||||
|
self,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Any:
|
||||||
|
directory = kwargs.get("directory", self.directory)
|
||||||
|
if directory[-1] == "/":
|
||||||
|
directory = directory[:-1]
|
||||||
|
files_list = [
|
||||||
|
f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}"
|
||||||
|
for root, dirs, files in os.walk(directory)
|
||||||
|
for filename in files
|
||||||
|
]
|
||||||
|
files = "\n- ".join(files_list)
|
||||||
|
return f"File paths: \n-{files}"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.loaders.directory_loader import DirectoryLoader
|
from embedchain.loaders.directory_loader import DirectoryLoader
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -1,26 +1,32 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|
||||||
class FixedDOCXSearchToolSchema(BaseModel):
|
class FixedDOCXSearchToolSchema(BaseModel):
|
||||||
"""Input for DOCXSearchTool."""
|
"""Input for DOCXSearchTool."""
|
||||||
docx: Optional[str] = Field(..., description="Mandatory docx path you want to search")
|
|
||||||
|
docx: Optional[str] = Field(
|
||||||
|
..., description="Mandatory docx path you want to search"
|
||||||
|
)
|
||||||
search_query: str = Field(
|
search_query: str = Field(
|
||||||
...,
|
...,
|
||||||
description="Mandatory search query you want to use to search the DOCX's content",
|
description="Mandatory search query you want to use to search the DOCX's content",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
|
class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
|
||||||
"""Input for DOCXSearchTool."""
|
"""Input for DOCXSearchTool."""
|
||||||
|
|
||||||
search_query: str = Field(
|
search_query: str = Field(
|
||||||
...,
|
...,
|
||||||
description="Mandatory search query you want to use to search the DOCX's content",
|
description="Mandatory search query you want to use to search the DOCX's content",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DOCXSearchTool(RagTool):
|
class DOCXSearchTool(RagTool):
|
||||||
name: str = "Search a DOCX's content"
|
name: str = "Search a DOCX's content"
|
||||||
description: str = (
|
description: str = (
|
||||||
@@ -56,9 +62,9 @@ class DOCXSearchTool(RagTool):
|
|||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
search_query = kwargs.get('search_query')
|
search_query = kwargs.get("search_query")
|
||||||
if search_query is None:
|
if search_query is None:
|
||||||
search_query = kwargs.get('query')
|
search_query = kwargs.get("query")
|
||||||
|
|
||||||
docx = kwargs.get("docx")
|
docx = kwargs.get("docx")
|
||||||
if docx is not None:
|
if docx is not None:
|
||||||
|
|||||||
@@ -1,36 +1,49 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Type
|
from typing import Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class EXABaseToolToolSchema(BaseModel):
|
class EXABaseToolToolSchema(BaseModel):
|
||||||
"""Input for EXABaseTool."""
|
"""Input for EXABaseTool."""
|
||||||
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
|
|
||||||
|
search_query: str = Field(
|
||||||
|
..., description="Mandatory search query you want to use to search the internet"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EXABaseTool(BaseTool):
|
class EXABaseTool(BaseTool):
|
||||||
name: str = "Search the internet"
|
name: str = "Search the internet"
|
||||||
description: str = "A tool that can be used to search the internet from a search_query"
|
description: str = (
|
||||||
args_schema: Type[BaseModel] = EXABaseToolToolSchema
|
"A tool that can be used to search the internet from a search_query"
|
||||||
search_url: str = "https://api.exa.ai/search"
|
)
|
||||||
n_results: int = None
|
args_schema: Type[BaseModel] = EXABaseToolToolSchema
|
||||||
headers: dict = {
|
search_url: str = "https://api.exa.ai/search"
|
||||||
"accept": "application/json",
|
n_results: int = None
|
||||||
"content-type": "application/json",
|
headers: dict = {
|
||||||
}
|
"accept": "application/json",
|
||||||
|
"content-type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
def _parse_results(self, results):
|
def _parse_results(self, results):
|
||||||
stirng = []
|
stirng = []
|
||||||
for result in results:
|
for result in results:
|
||||||
try:
|
try:
|
||||||
stirng.append('\n'.join([
|
stirng.append(
|
||||||
f"Title: {result['title']}",
|
"\n".join(
|
||||||
f"Score: {result['score']}",
|
[
|
||||||
f"Url: {result['url']}",
|
f"Title: {result['title']}",
|
||||||
f"ID: {result['id']}",
|
f"Score: {result['score']}",
|
||||||
"---"
|
f"Url: {result['url']}",
|
||||||
]))
|
f"ID: {result['id']}",
|
||||||
except KeyError:
|
"---",
|
||||||
next
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except KeyError:
|
||||||
|
next
|
||||||
|
|
||||||
content = '\n'.join(stirng)
|
content = "\n".join(stirng)
|
||||||
return f"\nSearch results: {content}\n"
|
return f"\nSearch results: {content}\n"
|
||||||
|
|||||||
@@ -1,19 +1,20 @@
|
|||||||
from typing import Optional, Type, Any
|
from typing import Any, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..base_tool import BaseTool
|
from ..base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FixedFileReadToolSchema(BaseModel):
|
class FixedFileReadToolSchema(BaseModel):
|
||||||
"""Input for FileReadTool."""
|
"""Input for FileReadTool."""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FileReadToolSchema(FixedFileReadToolSchema):
|
class FileReadToolSchema(FixedFileReadToolSchema):
|
||||||
"""Input for FileReadTool."""
|
"""Input for FileReadTool."""
|
||||||
file_path: str = Field(
|
|
||||||
...,
|
file_path: str = Field(..., description="Mandatory file full path to read the file")
|
||||||
description="Mandatory file full path to read the file"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FileReadTool(BaseTool):
|
class FileReadTool(BaseTool):
|
||||||
@@ -22,11 +23,7 @@ class FileReadTool(BaseTool):
|
|||||||
args_schema: Type[BaseModel] = FileReadToolSchema
|
args_schema: Type[BaseModel] = FileReadToolSchema
|
||||||
file_path: Optional[str] = None
|
file_path: Optional[str] = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, file_path: Optional[str] = None, **kwargs):
|
||||||
self,
|
|
||||||
file_path: Optional[str] = None,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
if file_path is not None:
|
if file_path is not None:
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
@@ -39,8 +36,8 @@ class FileReadTool(BaseTool):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
try:
|
try:
|
||||||
file_path = kwargs.get('file_path', self.file_path)
|
file_path = kwargs.get("file_path", self.file_path)
|
||||||
with open(file_path, 'r') as file:
|
with open(file_path, "r") as file:
|
||||||
return file.read()
|
return file.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Fail to read the file {file_path}. Error: {e}"
|
return f"Fail to read the file {file_path}. Error: {e}"
|
||||||
|
|||||||
@@ -1,39 +1,46 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Optional, Type, Any
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from pydantic.v1 import BaseModel
|
|
||||||
from ..base_tool import BaseTool
|
from ..base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FileWriterToolInput(BaseModel):
|
class FileWriterToolInput(BaseModel):
|
||||||
filename: str
|
filename: str
|
||||||
content: str
|
content: str
|
||||||
directory: Optional[str] = None
|
directory: Optional[str] = None
|
||||||
overwrite: bool = False
|
overwrite: bool = False
|
||||||
|
|
||||||
|
|
||||||
class FileWriterTool(BaseTool):
|
class FileWriterTool(BaseTool):
|
||||||
name: str = "File Writer Tool"
|
name: str = "File Writer Tool"
|
||||||
description: str = "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
|
description: str = (
|
||||||
|
"A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = FileWriterToolInput
|
args_schema: Type[BaseModel] = FileWriterToolInput
|
||||||
|
|
||||||
def _run(self, **kwargs: Any) -> str:
|
def _run(self, **kwargs: Any) -> str:
|
||||||
try:
|
try:
|
||||||
# Create the directory if it doesn't exist
|
# Create the directory if it doesn't exist
|
||||||
if kwargs['directory'] and not os.path.exists(kwargs['directory']):
|
if kwargs["directory"] and not os.path.exists(kwargs["directory"]):
|
||||||
os.makedirs(kwargs['directory'])
|
os.makedirs(kwargs["directory"])
|
||||||
|
|
||||||
# Construct the full path
|
# Construct the full path
|
||||||
filepath = os.path.join(kwargs['directory'] or '', kwargs['filename'])
|
filepath = os.path.join(kwargs["directory"] or "", kwargs["filename"])
|
||||||
|
|
||||||
# Check if file exists and overwrite is not allowed
|
# Check if file exists and overwrite is not allowed
|
||||||
if os.path.exists(filepath) and not kwargs['overwrite']:
|
if os.path.exists(filepath) and not kwargs["overwrite"]:
|
||||||
return f"File {filepath} already exists and overwrite option was not passed."
|
return f"File {filepath} already exists and overwrite option was not passed."
|
||||||
|
|
||||||
# Write content to the file
|
# Write content to the file
|
||||||
mode = 'w' if kwargs['overwrite'] else 'x'
|
mode = "w" if kwargs["overwrite"] else "x"
|
||||||
with open(filepath, mode) as file:
|
with open(filepath, mode) as file:
|
||||||
file.write(kwargs['content'])
|
file.write(kwargs["content"])
|
||||||
return f"Content successfully written to {filepath}"
|
return f"Content successfully written to {filepath}"
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
return f"File {filepath} already exists and overwrite option was not passed."
|
return (
|
||||||
|
f"File {filepath} already exists and overwrite option was not passed."
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"An error occurred while writing to the file: {str(e)}"
|
return f"An error occurred while writing to the file: {str(e)}"
|
||||||
|
|||||||
@@ -1,11 +1,19 @@
|
|||||||
from typing import Optional, Any, Type, Dict, List
|
from typing import Any, Dict, List, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
class FirecrawlCrawlWebsiteToolSchema(BaseModel):
|
||||||
url: str = Field(description="Website URL")
|
url: str = Field(description="Website URL")
|
||||||
crawler_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for crawling")
|
crawler_options: Optional[Dict[str, Any]] = Field(
|
||||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page")
|
default=None, description="Options for crawling"
|
||||||
|
)
|
||||||
|
page_options: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None, description="Options for page"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlCrawlWebsiteTool(BaseTool):
|
class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||||
name: str = "Firecrawl web crawl tool"
|
name: str = "Firecrawl web crawl tool"
|
||||||
@@ -17,22 +25,24 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
|||||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
try:
|
try:
|
||||||
from firecrawl import FirecrawlApp # type: ignore
|
from firecrawl import FirecrawlApp # type: ignore
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||||
|
|
||||||
def _run(self, url: str, crawler_options: Optional[Dict[str, Any]] = None, page_options: Optional[Dict[str, Any]] = None):
|
def _run(
|
||||||
if (crawler_options is None):
|
self,
|
||||||
|
url: str,
|
||||||
|
crawler_options: Optional[Dict[str, Any]] = None,
|
||||||
|
page_options: Optional[Dict[str, Any]] = None,
|
||||||
|
):
|
||||||
|
if crawler_options is None:
|
||||||
crawler_options = {}
|
crawler_options = {}
|
||||||
if (page_options is None):
|
if page_options is None:
|
||||||
page_options = {}
|
page_options = {}
|
||||||
|
|
||||||
options = {
|
options = {"crawlerOptions": crawler_options, "pageOptions": page_options}
|
||||||
"crawlerOptions": crawler_options,
|
return self.firecrawl.crawl_url(url, options)
|
||||||
"pageOptions": page_options
|
|
||||||
}
|
|
||||||
return self.firecrawl.crawl_url(url, options)
|
|
||||||
|
|||||||
@@ -1,12 +1,23 @@
|
|||||||
from typing import Optional, Any, Type, Dict
|
from typing import Any, Dict, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
class FirecrawlScrapeWebsiteToolSchema(BaseModel):
|
||||||
url: str = Field(description="Website URL")
|
url: str = Field(description="Website URL")
|
||||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for page scraping")
|
page_options: Optional[Dict[str, Any]] = Field(
|
||||||
extractor_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for data extraction")
|
default=None, description="Options for page scraping"
|
||||||
timeout: Optional[int] = Field(default=None, description="Timeout in milliseconds for the scraping operation. The default value is 30000.")
|
)
|
||||||
|
extractor_options: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None, description="Options for data extraction"
|
||||||
|
)
|
||||||
|
timeout: Optional[int] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Timeout in milliseconds for the scraping operation. The default value is 30000.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlScrapeWebsiteTool(BaseTool):
|
class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||||
name: str = "Firecrawl web scrape tool"
|
name: str = "Firecrawl web scrape tool"
|
||||||
@@ -18,15 +29,21 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
try:
|
try:
|
||||||
from firecrawl import FirecrawlApp # type: ignore
|
from firecrawl import FirecrawlApp # type: ignore
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||||
|
|
||||||
def _run(self, url: str, page_options: Optional[Dict[str, Any]] = None, extractor_options: Optional[Dict[str, Any]] = None, timeout: Optional[int] = None):
|
def _run(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
page_options: Optional[Dict[str, Any]] = None,
|
||||||
|
extractor_options: Optional[Dict[str, Any]] = None,
|
||||||
|
timeout: Optional[int] = None,
|
||||||
|
):
|
||||||
if page_options is None:
|
if page_options is None:
|
||||||
page_options = {}
|
page_options = {}
|
||||||
if extractor_options is None:
|
if extractor_options is None:
|
||||||
@@ -37,6 +54,6 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
|||||||
options = {
|
options = {
|
||||||
"pageOptions": page_options,
|
"pageOptions": page_options,
|
||||||
"extractorOptions": extractor_options,
|
"extractorOptions": extractor_options,
|
||||||
"timeout": timeout
|
"timeout": timeout,
|
||||||
}
|
}
|
||||||
return self.firecrawl.scrape_url(url, options)
|
return self.firecrawl.scrape_url(url, options)
|
||||||
|
|||||||
@@ -1,11 +1,19 @@
|
|||||||
from typing import Optional, Any, Type, Dict, List
|
from typing import Any, Dict, List, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlSearchToolSchema(BaseModel):
|
class FirecrawlSearchToolSchema(BaseModel):
|
||||||
query: str = Field(description="Search query")
|
query: str = Field(description="Search query")
|
||||||
page_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for result formatting")
|
page_options: Optional[Dict[str, Any]] = Field(
|
||||||
search_options: Optional[Dict[str, Any]] = Field(default=None, description="Options for searching")
|
default=None, description="Options for result formatting"
|
||||||
|
)
|
||||||
|
search_options: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None, description="Options for searching"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FirecrawlSearchTool(BaseTool):
|
class FirecrawlSearchTool(BaseTool):
|
||||||
name: str = "Firecrawl web search tool"
|
name: str = "Firecrawl web search tool"
|
||||||
@@ -17,22 +25,24 @@ class FirecrawlSearchTool(BaseTool):
|
|||||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
try:
|
try:
|
||||||
from firecrawl import FirecrawlApp # type: ignore
|
from firecrawl import FirecrawlApp # type: ignore
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
"`firecrawl` package not found, please run `pip install firecrawl-py`"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.firecrawl = FirecrawlApp(api_key=api_key)
|
self.firecrawl = FirecrawlApp(api_key=api_key)
|
||||||
|
|
||||||
def _run(self, query: str, page_options: Optional[Dict[str, Any]] = None, result_options: Optional[Dict[str, Any]] = None):
|
def _run(
|
||||||
if (page_options is None):
|
self,
|
||||||
|
query: str,
|
||||||
|
page_options: Optional[Dict[str, Any]] = None,
|
||||||
|
result_options: Optional[Dict[str, Any]] = None,
|
||||||
|
):
|
||||||
|
if page_options is None:
|
||||||
page_options = {}
|
page_options = {}
|
||||||
if (result_options is None):
|
if result_options is None:
|
||||||
result_options = {}
|
result_options = {}
|
||||||
|
|
||||||
options = {
|
options = {"pageOptions": page_options, "resultOptions": result_options}
|
||||||
"pageOptions": page_options,
|
|
||||||
"resultOptions": result_options
|
|
||||||
}
|
|
||||||
return self.firecrawl.search(query, options)
|
return self.firecrawl.search(query, options)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, List, Optional, Type
|
from typing import Any, List, Optional, Type
|
||||||
|
|
||||||
from embedchain.loaders.github import GithubLoader
|
from embedchain.loaders.github import GithubLoader
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
@@ -27,7 +27,9 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema):
|
|||||||
|
|
||||||
class GithubSearchTool(RagTool):
|
class GithubSearchTool(RagTool):
|
||||||
name: str = "Search a github repo's content"
|
name: str = "Search a github repo's content"
|
||||||
description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
|
description: str = (
|
||||||
|
"A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
|
||||||
|
)
|
||||||
summarize: bool = False
|
summarize: bool = False
|
||||||
gh_token: str
|
gh_token: str
|
||||||
args_schema: Type[BaseModel] = GithubSearchToolSchema
|
args_schema: Type[BaseModel] = GithubSearchToolSchema
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -1,50 +1,48 @@
|
|||||||
import os
|
from typing import Any, Optional, Type, cast
|
||||||
import json
|
|
||||||
import requests
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from typing import Type, Any, cast, Optional
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class LlamaIndexTool(BaseTool):
|
class LlamaIndexTool(BaseTool):
|
||||||
"""Tool to wrap LlamaIndex tools/query engines."""
|
"""Tool to wrap LlamaIndex tools/query engines."""
|
||||||
|
|
||||||
llama_index_tool: Any
|
llama_index_tool: Any
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
*args: Any,
|
*args: Any,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
"""Run tool."""
|
"""Run tool."""
|
||||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||||
|
|
||||||
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
tool = cast(LlamaBaseTool, self.llama_index_tool)
|
||||||
return tool(*args, **kwargs)
|
return tool(*args, **kwargs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_tool(
|
def from_tool(cls, tool: Any, **kwargs: Any) -> "LlamaIndexTool":
|
||||||
cls,
|
|
||||||
tool: Any,
|
|
||||||
**kwargs: Any
|
|
||||||
) -> "LlamaIndexTool":
|
|
||||||
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
from llama_index.core.tools import BaseTool as LlamaBaseTool
|
||||||
|
|
||||||
if not isinstance(tool, LlamaBaseTool):
|
if not isinstance(tool, LlamaBaseTool):
|
||||||
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
|
||||||
tool = cast(LlamaBaseTool, tool)
|
tool = cast(LlamaBaseTool, tool)
|
||||||
|
|
||||||
if tool.metadata.fn_schema is None:
|
if tool.metadata.fn_schema is None:
|
||||||
raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
|
raise ValueError(
|
||||||
|
"The LlamaIndex tool does not have an fn_schema specified."
|
||||||
|
)
|
||||||
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
|
args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
|
||||||
|
|
||||||
return cls(
|
return cls(
|
||||||
name=tool.metadata.name,
|
name=tool.metadata.name,
|
||||||
description=tool.metadata.description,
|
description=tool.metadata.description,
|
||||||
args_schema=args_schema,
|
args_schema=args_schema,
|
||||||
llama_index_tool=tool,
|
llama_index_tool=tool,
|
||||||
**kwargs
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_query_engine(
|
def from_query_engine(
|
||||||
cls,
|
cls,
|
||||||
@@ -52,7 +50,7 @@ class LlamaIndexTool(BaseTool):
|
|||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
description: Optional[str] = None,
|
description: Optional[str] = None,
|
||||||
return_direct: bool = False,
|
return_direct: bool = False,
|
||||||
**kwargs: Any
|
**kwargs: Any,
|
||||||
) -> "LlamaIndexTool":
|
) -> "LlamaIndexTool":
|
||||||
from llama_index.core.query_engine import BaseQueryEngine
|
from llama_index.core.query_engine import BaseQueryEngine
|
||||||
from llama_index.core.tools import QueryEngineTool
|
from llama_index.core.tools import QueryEngineTool
|
||||||
@@ -60,10 +58,11 @@ class LlamaIndexTool(BaseTool):
|
|||||||
if not isinstance(query_engine, BaseQueryEngine):
|
if not isinstance(query_engine, BaseQueryEngine):
|
||||||
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
|
||||||
|
|
||||||
# NOTE: by default the schema expects an `input` variable. However this
|
# NOTE: by default the schema expects an `input` variable. However this
|
||||||
# confuses crewAI so we are renaming to `query`.
|
# confuses crewAI so we are renaming to `query`.
|
||||||
class QueryToolSchema(BaseModel):
|
class QueryToolSchema(BaseModel):
|
||||||
"""Schema for query tool."""
|
"""Schema for query tool."""
|
||||||
|
|
||||||
query: str = Field(..., description="Search query for the query tool.")
|
query: str = Field(..., description="Search query for the query tool.")
|
||||||
|
|
||||||
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
|
# NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
|
||||||
@@ -72,13 +71,9 @@ class LlamaIndexTool(BaseTool):
|
|||||||
name=name,
|
name=name,
|
||||||
description=description,
|
description=description,
|
||||||
return_direct=return_direct,
|
return_direct=return_direct,
|
||||||
resolve_input_errors=True,
|
resolve_input_errors=True,
|
||||||
)
|
)
|
||||||
# HACK: we are replacing the schema with our custom schema
|
# HACK: we are replacing the schema with our custom schema
|
||||||
query_engine_tool.metadata.fn_schema = QueryToolSchema
|
query_engine_tool.metadata.fn_schema = QueryToolSchema
|
||||||
|
|
||||||
return cls.from_tool(
|
return cls.from_tool(query_engine_tool, **kwargs)
|
||||||
query_engine_tool,
|
|
||||||
**kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Type
|
from typing import Any, Type
|
||||||
|
|
||||||
from embedchain.loaders.mysql import MySQLLoader
|
from embedchain.loaders.mysql import MySQLLoader
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
@@ -17,7 +17,9 @@ class MySQLSearchToolSchema(BaseModel):
|
|||||||
|
|
||||||
class MySQLSearchTool(RagTool):
|
class MySQLSearchTool(RagTool):
|
||||||
name: str = "Search a database's table content"
|
name: str = "Search a database's table content"
|
||||||
description: str = "A tool that can be used to semantic search a query from a database table's content."
|
description: str = (
|
||||||
|
"A tool that can be used to semantic search a query from a database table's content."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = MySQLSearchToolSchema
|
args_schema: Type[BaseModel] = MySQLSearchToolSchema
|
||||||
db_uri: str = Field(..., description="Mandatory database URI")
|
db_uri: str = Field(..., description="Mandatory database URI")
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic import model_validator
|
from pydantic import BaseModel, Field, model_validator
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Type
|
from typing import Any, Type
|
||||||
|
|
||||||
from embedchain.loaders.postgres import PostgresLoader
|
from embedchain.loaders.postgres import PostgresLoader
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
@@ -17,7 +17,9 @@ class PGSearchToolSchema(BaseModel):
|
|||||||
|
|
||||||
class PGSearchTool(RagTool):
|
class PGSearchTool(RagTool):
|
||||||
name: str = "Search a database's table content"
|
name: str = "Search a database's table content"
|
||||||
description: str = "A tool that can be used to semantic search a query from a database table's content."
|
description: str = (
|
||||||
|
"A tool that can be used to semantic search a query from a database table's content."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = PGSearchToolSchema
|
args_schema: Type[BaseModel] = PGSearchToolSchema
|
||||||
db_uri: str = Field(..., description="Mandatory database URI")
|
db_uri: str = Field(..., description="Mandatory database URI")
|
||||||
|
|
||||||
|
|||||||
@@ -1,57 +1,76 @@
|
|||||||
import os
|
import os
|
||||||
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from typing import Optional, Type, Any
|
from pydantic import BaseModel, Field
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
from ..base_tool import BaseTool
|
from ..base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
|
class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
|
||||||
"""Input for ScrapeElementFromWebsiteTool."""
|
"""Input for ScrapeElementFromWebsiteTool."""
|
||||||
pass
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema):
|
class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema):
|
||||||
"""Input for ScrapeElementFromWebsiteTool."""
|
"""Input for ScrapeElementFromWebsiteTool."""
|
||||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
|
||||||
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
|
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||||
|
css_element: str = Field(
|
||||||
|
...,
|
||||||
|
description="Mandatory css reference for element to scrape from the website",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ScrapeElementFromWebsiteTool(BaseTool):
|
class ScrapeElementFromWebsiteTool(BaseTool):
|
||||||
name: str = "Read a website content"
|
name: str = "Read a website content"
|
||||||
description: str = "A tool that can be used to read a website content."
|
description: str = "A tool that can be used to read a website content."
|
||||||
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
|
args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
|
||||||
website_url: Optional[str] = None
|
website_url: Optional[str] = None
|
||||||
cookies: Optional[dict] = None
|
cookies: Optional[dict] = None
|
||||||
css_element: Optional[str] = None
|
css_element: Optional[str] = None
|
||||||
headers: Optional[dict] = {
|
headers: Optional[dict] = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||||
'Accept-Language': 'en-US,en;q=0.9',
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
'Referer': 'https://www.google.com/',
|
"Referer": "https://www.google.com/",
|
||||||
'Connection': 'keep-alive',
|
"Connection": "keep-alive",
|
||||||
'Upgrade-Insecure-Requests': '1',
|
"Upgrade-Insecure-Requests": "1",
|
||||||
'Accept-Encoding': 'gzip, deflate, br'
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
if website_url is not None:
|
|
||||||
self.website_url = website_url
|
|
||||||
self.css_element = css_element
|
|
||||||
self.description = f"A tool that can be used to read {website_url}'s content."
|
|
||||||
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
|
|
||||||
self._generate_description()
|
|
||||||
if cookies is not None:
|
|
||||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
|
||||||
|
|
||||||
def _run(
|
|
||||||
self,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> Any:
|
|
||||||
website_url = kwargs.get('website_url', self.website_url)
|
|
||||||
css_element = kwargs.get('css_element', self.css_element)
|
|
||||||
page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
|
|
||||||
parsed = BeautifulSoup(page.content, "html.parser")
|
|
||||||
elements = parsed.select(css_element)
|
|
||||||
return "\n".join([element.get_text() for element in elements])
|
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
website_url: Optional[str] = None,
|
||||||
|
cookies: Optional[dict] = None,
|
||||||
|
css_element: Optional[str] = None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
if website_url is not None:
|
||||||
|
self.website_url = website_url
|
||||||
|
self.css_element = css_element
|
||||||
|
self.description = (
|
||||||
|
f"A tool that can be used to read {website_url}'s content."
|
||||||
|
)
|
||||||
|
self.args_schema = FixedScrapeElementFromWebsiteToolSchema
|
||||||
|
self._generate_description()
|
||||||
|
if cookies is not None:
|
||||||
|
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||||
|
|
||||||
|
def _run(
|
||||||
|
self,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Any:
|
||||||
|
website_url = kwargs.get("website_url", self.website_url)
|
||||||
|
css_element = kwargs.get("css_element", self.css_element)
|
||||||
|
page = requests.get(
|
||||||
|
website_url,
|
||||||
|
headers=self.headers,
|
||||||
|
cookies=self.cookies if self.cookies else {},
|
||||||
|
)
|
||||||
|
parsed = BeautifulSoup(page.content, "html.parser")
|
||||||
|
elements = parsed.select(css_element)
|
||||||
|
return "\n".join([element.get_text() for element in elements])
|
||||||
|
|||||||
@@ -1,59 +1,73 @@
|
|||||||
import os
|
import os
|
||||||
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from typing import Optional, Type, Any
|
from pydantic import BaseModel, Field
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
from ..base_tool import BaseTool
|
from ..base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FixedScrapeWebsiteToolSchema(BaseModel):
|
class FixedScrapeWebsiteToolSchema(BaseModel):
|
||||||
"""Input for ScrapeWebsiteTool."""
|
"""Input for ScrapeWebsiteTool."""
|
||||||
pass
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema):
|
class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema):
|
||||||
"""Input for ScrapeWebsiteTool."""
|
"""Input for ScrapeWebsiteTool."""
|
||||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
|
||||||
|
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||||
|
|
||||||
|
|
||||||
class ScrapeWebsiteTool(BaseTool):
|
class ScrapeWebsiteTool(BaseTool):
|
||||||
name: str = "Read website content"
|
name: str = "Read website content"
|
||||||
description: str = "A tool that can be used to read a website content."
|
description: str = "A tool that can be used to read a website content."
|
||||||
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
|
args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
|
||||||
website_url: Optional[str] = None
|
website_url: Optional[str] = None
|
||||||
cookies: Optional[dict] = None
|
cookies: Optional[dict] = None
|
||||||
headers: Optional[dict] = {
|
headers: Optional[dict] = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||||
'Accept-Language': 'en-US,en;q=0.9',
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
'Referer': 'https://www.google.com/',
|
"Referer": "https://www.google.com/",
|
||||||
'Connection': 'keep-alive',
|
"Connection": "keep-alive",
|
||||||
'Upgrade-Insecure-Requests': '1'
|
"Upgrade-Insecure-Requests": "1",
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):
|
def __init__(
|
||||||
super().__init__(**kwargs)
|
self,
|
||||||
if website_url is not None:
|
website_url: Optional[str] = None,
|
||||||
self.website_url = website_url
|
cookies: Optional[dict] = None,
|
||||||
self.description = f"A tool that can be used to read {website_url}'s content."
|
**kwargs,
|
||||||
self.args_schema = FixedScrapeWebsiteToolSchema
|
):
|
||||||
self._generate_description()
|
super().__init__(**kwargs)
|
||||||
if cookies is not None:
|
if website_url is not None:
|
||||||
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
self.website_url = website_url
|
||||||
|
self.description = (
|
||||||
|
f"A tool that can be used to read {website_url}'s content."
|
||||||
|
)
|
||||||
|
self.args_schema = FixedScrapeWebsiteToolSchema
|
||||||
|
self._generate_description()
|
||||||
|
if cookies is not None:
|
||||||
|
self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
website_url = kwargs.get('website_url', self.website_url)
|
website_url = kwargs.get("website_url", self.website_url)
|
||||||
page = requests.get(
|
page = requests.get(
|
||||||
website_url,
|
website_url,
|
||||||
timeout=15,
|
timeout=15,
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
cookies=self.cookies if self.cookies else {}
|
cookies=self.cookies if self.cookies else {},
|
||||||
)
|
)
|
||||||
|
|
||||||
page.encoding = page.apparent_encoding
|
page.encoding = page.apparent_encoding
|
||||||
parsed = BeautifulSoup(page.text, "html.parser")
|
parsed = BeautifulSoup(page.text, "html.parser")
|
||||||
|
|
||||||
text = parsed.get_text()
|
text = parsed.get_text()
|
||||||
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
text = "\n".join([i for i in text.split("\n") if i.strip() != ""])
|
||||||
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
|
text = " ".join([i for i in text.split(" ") if i.strip() != ""])
|
||||||
return text
|
return text
|
||||||
|
|||||||
@@ -1,20 +1,31 @@
|
|||||||
import logging
|
import logging
|
||||||
|
from typing import Any, Dict, Literal, Optional, Type
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from typing import Optional, Any, Type, Dict, Literal
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
logger = logging.getLogger(__file__)
|
logger = logging.getLogger(__file__)
|
||||||
|
|
||||||
|
|
||||||
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
|
class ScrapflyScrapeWebsiteToolSchema(BaseModel):
|
||||||
url: str = Field(description="Webpage URL")
|
url: str = Field(description="Webpage URL")
|
||||||
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(default="markdown", description="Webpage extraction format")
|
scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field(
|
||||||
scrape_config: Optional[Dict[str, Any]] = Field(default=None, description="Scrapfly request scrape config")
|
default="markdown", description="Webpage extraction format"
|
||||||
ignore_scrape_failures: Optional[bool] = Field(default=None, description="whether to ignore failures")
|
)
|
||||||
|
scrape_config: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None, description="Scrapfly request scrape config"
|
||||||
|
)
|
||||||
|
ignore_scrape_failures: Optional[bool] = Field(
|
||||||
|
default=None, description="whether to ignore failures"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ScrapflyScrapeWebsiteTool(BaseTool):
|
class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||||
name: str = "Scrapfly web scraping API tool"
|
name: str = "Scrapfly web scraping API tool"
|
||||||
description: str = "Scrape a webpage url using Scrapfly and return its content as markdown or text"
|
description: str = (
|
||||||
|
"Scrape a webpage url using Scrapfly and return its content as markdown or text"
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
|
args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema
|
||||||
api_key: str = None
|
api_key: str = None
|
||||||
scrapfly: Optional[Any] = None
|
scrapfly: Optional[Any] = None
|
||||||
@@ -29,7 +40,13 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
|
|||||||
)
|
)
|
||||||
self.scrapfly = ScrapflyClient(key=api_key)
|
self.scrapfly = ScrapflyClient(key=api_key)
|
||||||
|
|
||||||
def _run(self, url: str, scrape_format: str = "markdown", scrape_config: Optional[Dict[str, Any]] = None, ignore_scrape_failures: Optional[bool] = None):
|
def _run(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
scrape_format: str = "markdown",
|
||||||
|
scrape_config: Optional[Dict[str, Any]] = None,
|
||||||
|
ignore_scrape_failures: Optional[bool] = None,
|
||||||
|
):
|
||||||
from scrapfly import ScrapeApiResponse, ScrapeConfig
|
from scrapfly import ScrapeApiResponse, ScrapeConfig
|
||||||
|
|
||||||
scrape_config = scrape_config if scrape_config is not None else {}
|
scrape_config = scrape_config if scrape_config is not None else {}
|
||||||
@@ -44,4 +61,3 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
|
|||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@@ -1,77 +1,94 @@
|
|||||||
from typing import Optional, Type, Any
|
|
||||||
import time
|
import time
|
||||||
from pydantic.v1 import BaseModel, Field
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
from ..base_tool import BaseTool
|
from ..base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class FixedSeleniumScrapingToolSchema(BaseModel):
|
class FixedSeleniumScrapingToolSchema(BaseModel):
|
||||||
"""Input for SeleniumScrapingTool."""
|
"""Input for SeleniumScrapingTool."""
|
||||||
pass
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
|
class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
|
||||||
"""Input for SeleniumScrapingTool."""
|
"""Input for SeleniumScrapingTool."""
|
||||||
website_url: str = Field(..., description="Mandatory website url to read the file")
|
|
||||||
css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
|
website_url: str = Field(..., description="Mandatory website url to read the file")
|
||||||
|
css_element: str = Field(
|
||||||
|
...,
|
||||||
|
description="Mandatory css reference for element to scrape from the website",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SeleniumScrapingTool(BaseTool):
|
class SeleniumScrapingTool(BaseTool):
|
||||||
name: str = "Read a website content"
|
name: str = "Read a website content"
|
||||||
description: str = "A tool that can be used to read a website content."
|
description: str = "A tool that can be used to read a website content."
|
||||||
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
|
args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
|
||||||
website_url: Optional[str] = None
|
website_url: Optional[str] = None
|
||||||
driver: Optional[Any] = webdriver.Chrome
|
driver: Optional[Any] = webdriver.Chrome
|
||||||
cookie: Optional[dict] = None
|
cookie: Optional[dict] = None
|
||||||
wait_time: Optional[int] = 3
|
wait_time: Optional[int] = 3
|
||||||
css_element: Optional[str] = None
|
css_element: Optional[str] = None
|
||||||
|
|
||||||
def __init__(self, website_url: Optional[str] = None, cookie: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
|
def __init__(
|
||||||
super().__init__(**kwargs)
|
self,
|
||||||
if cookie is not None:
|
website_url: Optional[str] = None,
|
||||||
self.cookie = cookie
|
cookie: Optional[dict] = None,
|
||||||
|
css_element: Optional[str] = None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
if cookie is not None:
|
||||||
|
self.cookie = cookie
|
||||||
|
|
||||||
if css_element is not None:
|
if css_element is not None:
|
||||||
self.css_element = css_element
|
self.css_element = css_element
|
||||||
|
|
||||||
if website_url is not None:
|
if website_url is not None:
|
||||||
self.website_url = website_url
|
self.website_url = website_url
|
||||||
self.description = f"A tool that can be used to read {website_url}'s content."
|
self.description = (
|
||||||
self.args_schema = FixedSeleniumScrapingToolSchema
|
f"A tool that can be used to read {website_url}'s content."
|
||||||
|
)
|
||||||
|
self.args_schema = FixedSeleniumScrapingToolSchema
|
||||||
|
|
||||||
self._generate_description()
|
self._generate_description()
|
||||||
def _run(
|
|
||||||
self,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> Any:
|
|
||||||
website_url = kwargs.get('website_url', self.website_url)
|
|
||||||
css_element = kwargs.get('css_element', self.css_element)
|
|
||||||
driver = self._create_driver(website_url, self.cookie, self.wait_time)
|
|
||||||
|
|
||||||
content = []
|
def _run(
|
||||||
if css_element is None or css_element.strip() == "":
|
self,
|
||||||
body_text = driver.find_element(By.TAG_NAME, "body").text
|
**kwargs: Any,
|
||||||
content.append(body_text)
|
) -> Any:
|
||||||
else:
|
website_url = kwargs.get("website_url", self.website_url)
|
||||||
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
|
css_element = kwargs.get("css_element", self.css_element)
|
||||||
content.append(element.text)
|
driver = self._create_driver(website_url, self.cookie, self.wait_time)
|
||||||
driver.close()
|
|
||||||
return "\n".join(content)
|
|
||||||
|
|
||||||
def _create_driver(self, url, cookie, wait_time):
|
content = []
|
||||||
options = Options()
|
if css_element is None or css_element.strip() == "":
|
||||||
options.add_argument("--headless")
|
body_text = driver.find_element(By.TAG_NAME, "body").text
|
||||||
driver = self.driver(options=options)
|
content.append(body_text)
|
||||||
driver.get(url)
|
else:
|
||||||
time.sleep(wait_time)
|
for element in driver.find_elements(By.CSS_SELECTOR, css_element):
|
||||||
if cookie:
|
content.append(element.text)
|
||||||
driver.add_cookie(cookie)
|
driver.close()
|
||||||
time.sleep(wait_time)
|
return "\n".join(content)
|
||||||
driver.get(url)
|
|
||||||
time.sleep(wait_time)
|
|
||||||
return driver
|
|
||||||
|
|
||||||
def close(self):
|
def _create_driver(self, url, cookie, wait_time):
|
||||||
self.driver.close()
|
options = Options()
|
||||||
|
options.add_argument("--headless")
|
||||||
|
driver = self.driver(options=options)
|
||||||
|
driver.get(url)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
if cookie:
|
||||||
|
driver.add_cookie(cookie)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
driver.get(url)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
return driver
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.driver.close()
|
||||||
|
|||||||
@@ -1,80 +1,94 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import os
|
|
||||||
import json
|
import json
|
||||||
import requests
|
import os
|
||||||
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from typing import Optional, Type, Any
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
def _save_results_to_file(content: str) -> None:
|
def _save_results_to_file(content: str) -> None:
|
||||||
"""Saves the search results to a file."""
|
"""Saves the search results to a file."""
|
||||||
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||||
with open(filename, 'w') as file:
|
with open(filename, "w") as file:
|
||||||
file.write(content)
|
file.write(content)
|
||||||
print(f"Results saved to {filename}")
|
print(f"Results saved to {filename}")
|
||||||
|
|
||||||
|
|
||||||
class SerperDevToolSchema(BaseModel):
|
class SerperDevToolSchema(BaseModel):
|
||||||
"""Input for SerperDevTool."""
|
"""Input for SerperDevTool."""
|
||||||
search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
|
|
||||||
|
search_query: str = Field(
|
||||||
|
..., description="Mandatory search query you want to use to search the internet"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SerperDevTool(BaseTool):
|
class SerperDevTool(BaseTool):
|
||||||
name: str = "Search the internet"
|
name: str = "Search the internet"
|
||||||
description: str = "A tool that can be used to search the internet with a search_query."
|
description: str = (
|
||||||
args_schema: Type[BaseModel] = SerperDevToolSchema
|
"A tool that can be used to search the internet with a search_query."
|
||||||
search_url: str = "https://google.serper.dev/search"
|
)
|
||||||
country: Optional[str] = ''
|
args_schema: Type[BaseModel] = SerperDevToolSchema
|
||||||
location: Optional[str] = ''
|
search_url: str = "https://google.serper.dev/search"
|
||||||
locale: Optional[str] = ''
|
country: Optional[str] = ""
|
||||||
n_results: int = 10
|
location: Optional[str] = ""
|
||||||
save_file: bool = False
|
locale: Optional[str] = ""
|
||||||
|
n_results: int = 10
|
||||||
|
save_file: bool = False
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
|
||||||
search_query = kwargs.get('search_query') or kwargs.get('query')
|
search_query = kwargs.get("search_query") or kwargs.get("query")
|
||||||
save_file = kwargs.get('save_file', self.save_file)
|
save_file = kwargs.get("save_file", self.save_file)
|
||||||
n_results = kwargs.get('n_results', self.n_results)
|
n_results = kwargs.get("n_results", self.n_results)
|
||||||
|
|
||||||
payload = { "q": search_query, "num": n_results }
|
payload = {"q": search_query, "num": n_results}
|
||||||
|
|
||||||
if self.country != '':
|
if self.country != "":
|
||||||
payload["gl"] = self.country
|
payload["gl"] = self.country
|
||||||
if self.location != '':
|
if self.location != "":
|
||||||
payload["location"] = self.location
|
payload["location"] = self.location
|
||||||
if self.locale != '':
|
if self.locale != "":
|
||||||
payload["hl"] = self.locale
|
payload["hl"] = self.locale
|
||||||
|
|
||||||
payload = json.dumps(payload)
|
payload = json.dumps(payload)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'X-API-KEY': os.environ['SERPER_API_KEY'],
|
"X-API-KEY": os.environ["SERPER_API_KEY"],
|
||||||
'content-type': 'application/json'
|
"content-type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
response = requests.request("POST", self.search_url, headers=headers, data=payload)
|
response = requests.request(
|
||||||
results = response.json()
|
"POST", self.search_url, headers=headers, data=payload
|
||||||
|
)
|
||||||
|
results = response.json()
|
||||||
|
|
||||||
if 'organic' in results:
|
if "organic" in results:
|
||||||
results = results['organic'][:self.n_results]
|
results = results["organic"][: self.n_results]
|
||||||
string = []
|
string = []
|
||||||
for result in results:
|
for result in results:
|
||||||
try:
|
try:
|
||||||
string.append('\n'.join([
|
string.append(
|
||||||
f"Title: {result['title']}",
|
"\n".join(
|
||||||
f"Link: {result['link']}",
|
[
|
||||||
f"Snippet: {result['snippet']}",
|
f"Title: {result['title']}",
|
||||||
"---"
|
f"Link: {result['link']}",
|
||||||
]))
|
f"Snippet: {result['snippet']}",
|
||||||
except KeyError:
|
"---",
|
||||||
continue
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
|
||||||
content = '\n'.join(string)
|
content = "\n".join(string)
|
||||||
if save_file:
|
if save_file:
|
||||||
_save_results_to_file(content)
|
_save_results_to_file(content)
|
||||||
return f"\nSearch results: {content}\n"
|
return f"\nSearch results: {content}\n"
|
||||||
else:
|
else:
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -1,19 +1,27 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
from typing import Any, Optional, Type
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from typing import Type, Any, Optional
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
import requests
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|
||||||
class SerplyJobSearchToolSchema(BaseModel):
|
class SerplyJobSearchToolSchema(BaseModel):
|
||||||
"""Input for Job Search."""
|
"""Input for Job Search."""
|
||||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch jobs postings.")
|
|
||||||
|
search_query: str = Field(
|
||||||
|
...,
|
||||||
|
description="Mandatory search query you want to use to fetch jobs postings.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SerplyJobSearchTool(RagTool):
|
class SerplyJobSearchTool(RagTool):
|
||||||
name: str = "Job Search"
|
name: str = "Job Search"
|
||||||
description: str = "A tool to perform to perform a job search in the US with a search_query."
|
description: str = (
|
||||||
|
"A tool to perform to perform a job search in the US with a search_query."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
|
args_schema: Type[BaseModel] = SerplyJobSearchToolSchema
|
||||||
request_url: str = "https://api.serply.io/v1/job/search/"
|
request_url: str = "https://api.serply.io/v1/job/search/"
|
||||||
proxy_location: Optional[str] = "US"
|
proxy_location: Optional[str] = "US"
|
||||||
@@ -23,20 +31,17 @@ class SerplyJobSearchTool(RagTool):
|
|||||||
"""
|
"""
|
||||||
headers: Optional[dict] = {}
|
headers: Optional[dict] = {}
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, **kwargs):
|
||||||
self,
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||||
"User-Agent": "crew-tools",
|
"User-Agent": "crew-tools",
|
||||||
"X-Proxy-Location": self.proxy_location
|
"X-Proxy-Location": self.proxy_location,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
query_payload = {}
|
query_payload = {}
|
||||||
|
|
||||||
@@ -58,18 +63,22 @@ class SerplyJobSearchTool(RagTool):
|
|||||||
string = []
|
string = []
|
||||||
for job in jobs:
|
for job in jobs:
|
||||||
try:
|
try:
|
||||||
string.append('\n'.join([
|
string.append(
|
||||||
f"Position: {job['position']}",
|
"\n".join(
|
||||||
f"Employer: {job['employer']}",
|
[
|
||||||
f"Location: {job['location']}",
|
f"Position: {job['position']}",
|
||||||
f"Link: {job['link']}",
|
f"Employer: {job['employer']}",
|
||||||
f"""Highest: {', '.join([h for h in job['highlights']])}""",
|
f"Location: {job['location']}",
|
||||||
f"Is Remote: {job['is_remote']}",
|
f"Link: {job['link']}",
|
||||||
f"Is Hybrid: {job['is_remote']}",
|
f"""Highest: {', '.join([h for h in job['highlights']])}""",
|
||||||
"---"
|
f"Is Remote: {job['is_remote']}",
|
||||||
]))
|
f"Is Hybrid: {job['is_remote']}",
|
||||||
|
"---",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
content = '\n'.join(string)
|
content = "\n".join(string)
|
||||||
return f"\nSearch results: {content}\n"
|
return f"\nSearch results: {content}\n"
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
from typing import Any, Optional, Type
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from typing import Type, Any, Optional
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
import requests
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class SerplyNewsSearchToolSchema(BaseModel):
|
class SerplyNewsSearchToolSchema(BaseModel):
|
||||||
"""Input for Serply News Search."""
|
"""Input for Serply News Search."""
|
||||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch news articles")
|
|
||||||
|
search_query: str = Field(
|
||||||
|
..., description="Mandatory search query you want to use to fetch news articles"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SerplyNewsSearchTool(BaseTool):
|
class SerplyNewsSearchTool(BaseTool):
|
||||||
@@ -21,15 +26,12 @@ class SerplyNewsSearchTool(BaseTool):
|
|||||||
limit: Optional[int] = 10
|
limit: Optional[int] = 10
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self, limit: Optional[int] = 10, proxy_location: Optional[str] = "US", **kwargs
|
||||||
limit: Optional[int] = 10,
|
|
||||||
proxy_location: Optional[str] = "US",
|
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||||
proxy_location: (str): Where to get news, specifically for a specific country results.
|
proxy_location: (str): Where to get news, specifically for a specific country results.
|
||||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||||
"""
|
"""
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.limit = limit
|
self.limit = limit
|
||||||
@@ -37,12 +39,12 @@ class SerplyNewsSearchTool(BaseTool):
|
|||||||
self.headers = {
|
self.headers = {
|
||||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||||
"User-Agent": "crew-tools",
|
"User-Agent": "crew-tools",
|
||||||
"X-Proxy-Location": proxy_location
|
"X-Proxy-Location": proxy_location,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
# build query parameters
|
# build query parameters
|
||||||
query_payload = {}
|
query_payload = {}
|
||||||
@@ -58,24 +60,28 @@ class SerplyNewsSearchTool(BaseTool):
|
|||||||
response = requests.request("GET", url, headers=self.headers)
|
response = requests.request("GET", url, headers=self.headers)
|
||||||
results = response.json()
|
results = response.json()
|
||||||
if "entries" in results:
|
if "entries" in results:
|
||||||
results = results['entries']
|
results = results["entries"]
|
||||||
string = []
|
string = []
|
||||||
for result in results[:self.limit]:
|
for result in results[: self.limit]:
|
||||||
try:
|
try:
|
||||||
# follow url
|
# follow url
|
||||||
r = requests.get(result['link'])
|
r = requests.get(result["link"])
|
||||||
final_link = r.history[-1].headers['Location']
|
final_link = r.history[-1].headers["Location"]
|
||||||
string.append('\n'.join([
|
string.append(
|
||||||
f"Title: {result['title']}",
|
"\n".join(
|
||||||
f"Link: {final_link}",
|
[
|
||||||
f"Source: {result['source']['title']}",
|
f"Title: {result['title']}",
|
||||||
f"Published: {result['published']}",
|
f"Link: {final_link}",
|
||||||
"---"
|
f"Source: {result['source']['title']}",
|
||||||
]))
|
f"Published: {result['published']}",
|
||||||
|
"---",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
content = '\n'.join(string)
|
content = "\n".join(string)
|
||||||
return f"\nSearch results: {content}\n"
|
return f"\nSearch results: {content}\n"
|
||||||
else:
|
else:
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -1,36 +1,39 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
from typing import Any, Optional, Type
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from typing import Type, Any, Optional
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
import requests
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class SerplyScholarSearchToolSchema(BaseModel):
|
class SerplyScholarSearchToolSchema(BaseModel):
|
||||||
"""Input for Serply Scholar Search."""
|
"""Input for Serply Scholar Search."""
|
||||||
search_query: str = Field(..., description="Mandatory search query you want to use to fetch scholarly literature")
|
|
||||||
|
search_query: str = Field(
|
||||||
|
...,
|
||||||
|
description="Mandatory search query you want to use to fetch scholarly literature",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SerplyScholarSearchTool(BaseTool):
|
class SerplyScholarSearchTool(BaseTool):
|
||||||
name: str = "Scholar Search"
|
name: str = "Scholar Search"
|
||||||
description: str = "A tool to perform scholarly literature search with a search_query."
|
description: str = (
|
||||||
|
"A tool to perform scholarly literature search with a search_query."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
|
args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema
|
||||||
search_url: str = "https://api.serply.io/v1/scholar/"
|
search_url: str = "https://api.serply.io/v1/scholar/"
|
||||||
hl: Optional[str] = "us"
|
hl: Optional[str] = "us"
|
||||||
proxy_location: Optional[str] = "US"
|
proxy_location: Optional[str] = "US"
|
||||||
headers: Optional[dict] = {}
|
headers: Optional[dict] = {}
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, hl: str = "us", proxy_location: Optional[str] = "US", **kwargs):
|
||||||
self,
|
|
||||||
hl: str = "us",
|
|
||||||
proxy_location: Optional[str] = "US",
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
param: hl (str): host Language code to display results in
|
param: hl (str): host Language code to display results in
|
||||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||||
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
|
proxy_location: (str): Specify the proxy location for the search, specifically for a specific country results.
|
||||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||||
"""
|
"""
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.hl = hl
|
self.hl = hl
|
||||||
@@ -38,16 +41,14 @@ class SerplyScholarSearchTool(BaseTool):
|
|||||||
self.headers = {
|
self.headers = {
|
||||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||||
"User-Agent": "crew-tools",
|
"User-Agent": "crew-tools",
|
||||||
"X-Proxy-Location": proxy_location
|
"X-Proxy-Location": proxy_location,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
query_payload = {
|
query_payload = {"hl": self.hl}
|
||||||
"hl": self.hl
|
|
||||||
}
|
|
||||||
|
|
||||||
if "query" in kwargs:
|
if "query" in kwargs:
|
||||||
query_payload["q"] = kwargs["query"]
|
query_payload["q"] = kwargs["query"]
|
||||||
@@ -67,20 +68,24 @@ class SerplyScholarSearchTool(BaseTool):
|
|||||||
for article in articles:
|
for article in articles:
|
||||||
try:
|
try:
|
||||||
if "doc" in article:
|
if "doc" in article:
|
||||||
link = article['doc']['link']
|
link = article["doc"]["link"]
|
||||||
else:
|
else:
|
||||||
link = article['link']
|
link = article["link"]
|
||||||
authors = [author['name'] for author in article['author']['authors']]
|
authors = [author["name"] for author in article["author"]["authors"]]
|
||||||
string.append('\n'.join([
|
string.append(
|
||||||
f"Title: {article['title']}",
|
"\n".join(
|
||||||
f"Link: {link}",
|
[
|
||||||
f"Description: {article['description']}",
|
f"Title: {article['title']}",
|
||||||
f"Cite: {article['cite']}",
|
f"Link: {link}",
|
||||||
f"Authors: {', '.join(authors)}",
|
f"Description: {article['description']}",
|
||||||
"---"
|
f"Cite: {article['cite']}",
|
||||||
]))
|
f"Authors: {', '.join(authors)}",
|
||||||
|
"---",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
content = '\n'.join(string)
|
content = "\n".join(string)
|
||||||
return f"\nSearch results: {content}\n"
|
return f"\nSearch results: {content}\n"
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
from typing import Any, Optional, Type
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from typing import Type, Any, Optional
|
|
||||||
from pydantic.v1 import BaseModel, Field
|
import requests
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class SerplyWebSearchToolSchema(BaseModel):
|
class SerplyWebSearchToolSchema(BaseModel):
|
||||||
"""Input for Serply Web Search."""
|
"""Input for Serply Web Search."""
|
||||||
search_query: str = Field(..., description="Mandatory search query you want to use to Google search")
|
|
||||||
|
search_query: str = Field(
|
||||||
|
..., description="Mandatory search query you want to use to Google search"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SerplyWebSearchTool(BaseTool):
|
class SerplyWebSearchTool(BaseTool):
|
||||||
@@ -24,21 +29,21 @@ class SerplyWebSearchTool(BaseTool):
|
|||||||
headers: Optional[dict] = {}
|
headers: Optional[dict] = {}
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
hl: str = "us",
|
hl: str = "us",
|
||||||
limit: int = 10,
|
limit: int = 10,
|
||||||
device_type: str = "desktop",
|
device_type: str = "desktop",
|
||||||
proxy_location: str = "US",
|
proxy_location: str = "US",
|
||||||
**kwargs
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
param: query (str): The query to search for
|
param: query (str): The query to search for
|
||||||
param: hl (str): host Language code to display results in
|
param: hl (str): host Language code to display results in
|
||||||
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
(reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
|
||||||
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
param: limit (int): The maximum number of results to return [10-100, defaults to 10]
|
||||||
param: device_type (str): desktop/mobile results (defaults to desktop)
|
param: device_type (str): desktop/mobile results (defaults to desktop)
|
||||||
proxy_location: (str): Where to perform the search, specifically for local/regional results.
|
proxy_location: (str): Where to perform the search, specifically for local/regional results.
|
||||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||||
"""
|
"""
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
@@ -50,18 +55,18 @@ class SerplyWebSearchTool(BaseTool):
|
|||||||
self.query_payload = {
|
self.query_payload = {
|
||||||
"num": limit,
|
"num": limit,
|
||||||
"gl": proxy_location.upper(),
|
"gl": proxy_location.upper(),
|
||||||
"hl": hl.lower()
|
"hl": hl.lower(),
|
||||||
}
|
}
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||||
"X-User-Agent": device_type,
|
"X-User-Agent": device_type,
|
||||||
"User-Agent": "crew-tools",
|
"User-Agent": "crew-tools",
|
||||||
"X-Proxy-Location": proxy_location
|
"X-Proxy-Location": proxy_location,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
if "query" in kwargs:
|
if "query" in kwargs:
|
||||||
self.query_payload["q"] = kwargs["query"]
|
self.query_payload["q"] = kwargs["query"]
|
||||||
@@ -74,20 +79,24 @@ class SerplyWebSearchTool(BaseTool):
|
|||||||
response = requests.request("GET", url, headers=self.headers)
|
response = requests.request("GET", url, headers=self.headers)
|
||||||
results = response.json()
|
results = response.json()
|
||||||
if "results" in results:
|
if "results" in results:
|
||||||
results = results['results']
|
results = results["results"]
|
||||||
string = []
|
string = []
|
||||||
for result in results:
|
for result in results:
|
||||||
try:
|
try:
|
||||||
string.append('\n'.join([
|
string.append(
|
||||||
f"Title: {result['title']}",
|
"\n".join(
|
||||||
f"Link: {result['link']}",
|
[
|
||||||
f"Description: {result['description'].strip()}",
|
f"Title: {result['title']}",
|
||||||
"---"
|
f"Link: {result['link']}",
|
||||||
]))
|
f"Description: {result['description'].strip()}",
|
||||||
|
"---",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
content = '\n'.join(string)
|
content = "\n".join(string)
|
||||||
return f"\nSearch results: {content}\n"
|
return f"\nSearch results: {content}\n"
|
||||||
else:
|
else:
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -1,48 +1,50 @@
|
|||||||
import os
|
import os
|
||||||
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from typing import Type, Any, Optional
|
from pydantic import BaseModel, Field
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|
||||||
class SerplyWebpageToMarkdownToolSchema(BaseModel):
|
class SerplyWebpageToMarkdownToolSchema(BaseModel):
|
||||||
"""Input for Serply Search."""
|
"""Input for Serply Search."""
|
||||||
url: str = Field(..., description="Mandatory url you want to use to fetch and convert to markdown")
|
|
||||||
|
url: str = Field(
|
||||||
|
...,
|
||||||
|
description="Mandatory url you want to use to fetch and convert to markdown",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SerplyWebpageToMarkdownTool(RagTool):
|
class SerplyWebpageToMarkdownTool(RagTool):
|
||||||
name: str = "Webpage to Markdown"
|
name: str = "Webpage to Markdown"
|
||||||
description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
|
description: str = (
|
||||||
|
"A tool to perform convert a webpage to markdown to make it easier for LLMs to understand"
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
|
args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema
|
||||||
request_url: str = "https://api.serply.io/v1/request"
|
request_url: str = "https://api.serply.io/v1/request"
|
||||||
proxy_location: Optional[str] = "US"
|
proxy_location: Optional[str] = "US"
|
||||||
headers: Optional[dict] = {}
|
headers: Optional[dict] = {}
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, proxy_location: Optional[str] = "US", **kwargs):
|
||||||
self,
|
|
||||||
proxy_location: Optional[str] = "US",
|
|
||||||
**kwargs
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
proxy_location: (str): Where to perform the search, specifically for a specific country results.
|
proxy_location: (str): Where to perform the search, specifically for a specific country results.
|
||||||
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US)
|
||||||
"""
|
"""
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.proxy_location = proxy_location
|
self.proxy_location = proxy_location
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
"X-API-KEY": os.environ["SERPLY_API_KEY"],
|
||||||
"User-Agent": "crew-tools",
|
"User-Agent": "crew-tools",
|
||||||
"X-Proxy-Location": proxy_location
|
"X-Proxy-Location": proxy_location,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
data = {
|
data = {"url": kwargs["url"], "method": "GET", "response_type": "markdown"}
|
||||||
"url": kwargs["url"],
|
response = requests.request(
|
||||||
"method": "GET",
|
"POST", self.request_url, headers=self.headers, json=data
|
||||||
"response_type": "markdown"
|
)
|
||||||
}
|
|
||||||
response = requests.request("POST", self.request_url, headers=self.headers, json=data)
|
|
||||||
return response.text
|
return response.text
|
||||||
|
|||||||
@@ -1,21 +1,25 @@
|
|||||||
from typing import Optional, Any, Type, Dict, Literal
|
from typing import Any, Dict, Literal, Optional, Type
|
||||||
from pydantic.v1 import BaseModel, Field
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class SpiderToolSchema(BaseModel):
|
class SpiderToolSchema(BaseModel):
|
||||||
url: str = Field(description="Website URL")
|
url: str = Field(description="Website URL")
|
||||||
params: Optional[Dict[str, Any]] = Field(
|
params: Optional[Dict[str, Any]] = Field(
|
||||||
description="Set additional params. Options include:\n"
|
description="Set additional params. Options include:\n"
|
||||||
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
|
"- `limit`: Optional[int] - The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages.\n"
|
||||||
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
|
"- `depth`: Optional[int] - The crawl limit for maximum depth. If `0`, no limit will be applied.\n"
|
||||||
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
|
"- `metadata`: Optional[bool] - Boolean to include metadata or not. Defaults to `False` unless set to `True`. If the user wants metadata, include params.metadata = True.\n"
|
||||||
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
|
"- `query_selector`: Optional[str] - The CSS query selector to use when extracting content from the markup.\n"
|
||||||
)
|
)
|
||||||
mode: Literal["scrape", "crawl"] = Field(
|
mode: Literal["scrape", "crawl"] = Field(
|
||||||
default="scrape",
|
default="scrape",
|
||||||
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set."
|
description="Mode, the only two allowed modes are `scrape` or `crawl`. Use `scrape` to scrape a single page and `crawl` to crawl the entire website following subpages. These modes are the only allowed values even when ANY params is set.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SpiderTool(BaseTool):
|
class SpiderTool(BaseTool):
|
||||||
name: str = "Spider scrape & crawl tool"
|
name: str = "Spider scrape & crawl tool"
|
||||||
description: str = "Scrape & Crawl any url and return LLM-ready data."
|
description: str = "Scrape & Crawl any url and return LLM-ready data."
|
||||||
@@ -26,11 +30,11 @@ class SpiderTool(BaseTool):
|
|||||||
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
try:
|
try:
|
||||||
from spider import Spider # type: ignore
|
from spider import Spider # type: ignore
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"`spider-client` package not found, please run `pip install spider-client`"
|
"`spider-client` package not found, please run `pip install spider-client`"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.spider = Spider(api_key=api_key)
|
self.spider = Spider(api_key=api_key)
|
||||||
|
|
||||||
@@ -38,7 +42,7 @@ class SpiderTool(BaseTool):
|
|||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
mode: Optional[Literal["scrape", "crawl"]] = "scrape"
|
mode: Optional[Literal["scrape", "crawl"]] = "scrape",
|
||||||
):
|
):
|
||||||
if mode not in ["scrape", "crawl"]:
|
if mode not in ["scrape", "crawl"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -51,9 +55,7 @@ class SpiderTool(BaseTool):
|
|||||||
else:
|
else:
|
||||||
params = {"return_format": "markdown"}
|
params = {"return_format": "markdown"}
|
||||||
|
|
||||||
action = (
|
action = self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
|
||||||
self.spider.scrape_url if mode == "scrape" else self.spider.crawl_url
|
|
||||||
)
|
|
||||||
spider_docs = action(url=url, params=params)
|
spider_docs = action(url=url, params=params)
|
||||||
|
|
||||||
return spider_docs
|
return spider_docs
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -2,9 +2,10 @@ import base64
|
|||||||
from typing import Type
|
from typing import Type
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from crewai_tools.tools.base_tool import BaseTool
|
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from pydantic.v1 import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from crewai_tools.tools.base_tool import BaseTool
|
||||||
|
|
||||||
|
|
||||||
class ImagePromptSchema(BaseModel):
|
class ImagePromptSchema(BaseModel):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
@@ -25,7 +25,9 @@ class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema):
|
|||||||
|
|
||||||
class WebsiteSearchTool(RagTool):
|
class WebsiteSearchTool(RagTool):
|
||||||
name: str = "Search in a specific website"
|
name: str = "Search in a specific website"
|
||||||
description: str = "A tool that can be used to semantic search a query from a specific URL content."
|
description: str = (
|
||||||
|
"A tool that can be used to semantic search a query from a specific URL content."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = WebsiteSearchToolSchema
|
args_schema: Type[BaseModel] = WebsiteSearchToolSchema
|
||||||
|
|
||||||
def __init__(self, website: Optional[str] = None, **kwargs):
|
def __init__(self, website: Optional[str] = None, **kwargs):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
@@ -25,7 +25,9 @@ class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema):
|
|||||||
|
|
||||||
class YoutubeChannelSearchTool(RagTool):
|
class YoutubeChannelSearchTool(RagTool):
|
||||||
name: str = "Search a Youtube Channels content"
|
name: str = "Search a Youtube Channels content"
|
||||||
description: str = "A tool that can be used to semantic search a query from a Youtube Channels content."
|
description: str = (
|
||||||
|
"A tool that can be used to semantic search a query from a Youtube Channels content."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema
|
args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema
|
||||||
|
|
||||||
def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs):
|
def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Optional, Type
|
from typing import Any, Optional, Type
|
||||||
|
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.models.data_type import DataType
|
||||||
from pydantic.v1 import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from ..rag.rag_tool import RagTool
|
from ..rag.rag_tool import RagTool
|
||||||
|
|
||||||
@@ -25,7 +25,9 @@ class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema):
|
|||||||
|
|
||||||
class YoutubeVideoSearchTool(RagTool):
|
class YoutubeVideoSearchTool(RagTool):
|
||||||
name: str = "Search a Youtube Video content"
|
name: str = "Search a Youtube Video content"
|
||||||
description: str = "A tool that can be used to semantic search a query from a Youtube Video content."
|
description: str = (
|
||||||
|
"A tool that can be used to semantic search a query from a Youtube Video content."
|
||||||
|
)
|
||||||
args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema
|
args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema
|
||||||
|
|
||||||
def __init__(self, youtube_video_url: Optional[str] = None, **kwargs):
|
def __init__(self, youtube_video_url: Optional[str] = None, **kwargs):
|
||||||
|
|||||||
Reference in New Issue
Block a user