diff --git a/src/crewai_tools/__init__.py b/src/crewai_tools/__init__.py index 09d10d88f..a6a269be6 100644 --- a/src/crewai_tools/__init__.py +++ b/src/crewai_tools/__init__.py @@ -1 +1,19 @@ from .tools.base_tool import BaseTool, Tool, as_tool, tool +from .tools import ( + CodeDocsSearchTool, + CSVSearchTool, + DirectorySearchTool, + DOCXSearchTool, + FileReadTool, + GithubSearchTool, + TXTSearchTool, + JSONSearchTool, + MDXSearchTool, + PDFSearchTool, + PGSearchTool, + RagTool, + WebsiteSearchTool, + XMLSearchTool, + YoutubeChannelSearchTool, + YoutubeVideoSearchTool, +) \ No newline at end of file diff --git a/src/crewai_tools/adapters/embedchain_adapter.py b/src/crewai_tools/adapters/embedchain_adapter.py index cdb7f1d5a..16491fb25 100644 --- a/src/crewai_tools/adapters/embedchain_adapter.py +++ b/src/crewai_tools/adapters/embedchain_adapter.py @@ -1,14 +1,12 @@ -from embedchain import App - +from typing import Any from crewai_tools.tools.rag.rag_tool import Adapter - class EmbedchainAdapter(Adapter): - embedchain_app: App - dry_run: bool = False + embedchain_app: Any + summarize: bool = False def query(self, question: str) -> str: - result = self.embedchain_app.query(question, dry_run=self.dry_run) - if result is list: - return "\n".join(result) - return str(result) + result, sources = self.embedchain_app.query(question, citations=True, dry_run=(not self.summarize)) + if self.summarize: + return result + return "\n\n".join([source[0] for source in sources]) diff --git a/src/crewai_tools/tools/__init__.py b/src/crewai_tools/tools/__init__.py index 1628a6bca..2910185ec 100644 --- a/src/crewai_tools/tools/__init__.py +++ b/src/crewai_tools/tools/__init__.py @@ -1 +1,17 @@ -from .rag.rag_tool import RagTool \ No newline at end of file +from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool +from .csv_search_tool.csv_search_tool import CSVSearchTool +from .directory_search_tool.directory_search_tool import DirectorySearchTool +from .docx_search_tool.docx_search_tool import DOCXSearchTool +from .file_read_tool.file_read_tool import FileReadTool +from .github_search_tool.github_search_tool import GithubSearchTool +from .serper_dev_tool.serper_dev_tool import SeperDevTool +from .txt_search_tool.txt_search_tool import TXTSearchTool +from .json_search_tool.json_search_tool import JSONSearchTool +from .mdx_seach_tool.mdx_search_tool import MDXSearchTool +from .pdf_search_tool.pdf_search_tool import PDFSearchTool +from .pg_seach_tool.pg_search_tool import PGSearchTool +from .rag.rag_tool import RagTool +from .website_search.website_search_tool import WebsiteSearchTool +from .xml_search_tool.xml_search_tool import XMLSearchTool +from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool +from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool \ No newline at end of file diff --git a/src/crewai_tools/tools/base_tool.py b/src/crewai_tools/tools/base_tool.py index eadef2368..e2fb18395 100644 --- a/src/crewai_tools/tools/base_tool.py +++ b/src/crewai_tools/tools/base_tool.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Callable, cast +from typing import Any, Callable, cast, Optional, Type from langchain.agents import tools as langchain_tools from pydantic import BaseModel @@ -10,12 +10,15 @@ class BaseTool(BaseModel, ABC): """The unique name of the tool that clearly communicates its purpose.""" description: str """Used to tell the model how/when/why to use the tool.""" + args_schema: Optional[Type[BaseModel]] = None + """The schema for the arguments that the tool accepts.""" def run( self, *args: Any, **kwargs: Any, ) -> Any: + print(f"Using Tool: {self.name}") return self._run(*args, **kwargs) @abstractmethod @@ -30,6 +33,7 @@ class BaseTool(BaseModel, ABC): return langchain_tools.Tool( name=self.name, description=self.description, + args_schema=self.args_schema, func=self._run, ) diff --git a/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py b/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py new file mode 100644 index 000000000..fd0acf4ca --- /dev/null +++ b/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedCodeDocsSearchToolSchema(BaseModel): + """Input for CodeDocsSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the Code Docs content") + +class CodeDocsSearchToolSchema(FixedCodeDocsSearchToolSchema): + """Input for CodeDocsSearchTool.""" + docs_url: str = Field(..., description="Mandatory docs_url path you want to search") + +class CodeDocsSearchTool(RagTool): + name: str = "Search a Code Docs content" + description: str = "A tool that can be used to semantic search a query from a Code Docs content." + summarize: bool = False + args_schema: Type[BaseModel] = CodeDocsSearchToolSchema + docs_url: Optional[str] = None + + def __init__(self, docs_url: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if docs_url is not None: + self.docs_url = docs_url + self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content." + self.args_schema = FixedCodeDocsSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + docs_url = kwargs.get('docs_url', self.docs_url) + self.app = App() + self.app.add(docs_url, data_type=DataType.DOCS_SITE) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py b/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py new file mode 100644 index 000000000..8cc06e263 --- /dev/null +++ b/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedCSVSearchToolSchema(BaseModel): + """Input for CSVSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the CSV's content") + +class CSVSearchToolSchema(FixedCSVSearchToolSchema): + """Input for CSVSearchTool.""" + pdf: str = Field(..., description="Mandatory csv path you want to search") + +class CSVSearchTool(RagTool): + name: str = "Search a CSV's content" + description: str = "A tool that can be used to semantic search a query from a CSV's content." + summarize: bool = False + args_schema: Type[BaseModel] = CSVSearchToolSchema + csv: Optional[str] = None + + def __init__(self, csv: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if csv is not None: + self.csv = csv + self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content." + self.args_schema = FixedCSVSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + csv = kwargs.get('csv', self.csv) + self.app = App() + self.app.add(csv, data_type=DataType.CSV) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py b/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py new file mode 100644 index 000000000..39c34fc93 --- /dev/null +++ b/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py @@ -0,0 +1,41 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.loaders.directory_loader import DirectoryLoader + +from ..rag.rag_tool import RagTool + + +class FixedDirectorySearchToolSchema(BaseModel): + """Input for DirectorySearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the directory's content") + +class DirectorySearchToolSchema(FixedDirectorySearchToolSchema): + """Input for DirectorySearchTool.""" + directory: str = Field(..., description="Mandatory directory you want to search") + +class DirectorySearchTool(RagTool): + name: str = "Search a directory's content" + description: str = "A tool that can be used to semantic search a query from a directory's content." + summarize: bool = False + args_schema: Type[BaseModel] = DirectorySearchToolSchema + directory: Optional[str] = None + + def __init__(self, directory: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if directory is not None: + self.directory = directory + self.description = f"A tool that can be used to semantic search a query the {directory} directory's content." + self.args_schema = FixedDirectorySearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + directory = kwargs.get('directory', self.directory) + loader = DirectoryLoader(config=dict(recursive=True)) + self.app = App() + self.app.add(directory, loader=loader) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py b/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py new file mode 100644 index 000000000..1a52e5f3b --- /dev/null +++ b/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedDOCXSearchToolSchema(BaseModel): + """Input for DOCXSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the DOCX's content") + +class DOCXSearchToolSchema(FixedDOCXSearchToolSchema): + """Input for DOCXSearchTool.""" + docx: str = Field(..., description="Mandatory docx path you want to search") + +class DOCXSearchTool(RagTool): + name: str = "Search a DOCX's content" + description: str = "A tool that can be used to semantic search a query from a DOCX's content." + summarize: bool = False + args_schema: Type[BaseModel] = DOCXSearchToolSchema + docx: Optional[str] = None + + def __init__(self, docx: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if docx is not None: + self.docx = docx + self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content." + self.args_schema = FixedDOCXSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + docx = kwargs.get('docx', self.docx) + self.app = App() + self.app.add(docx, data_type=DataType.DOCX) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/src/crewai_tools/tools/file_read_tool/file_read_tool.py new file mode 100644 index 000000000..0721ec127 --- /dev/null +++ b/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -0,0 +1,32 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field +from ..base_tool import BaseTool + +class FixedFileReadToolSchema(BaseModel): + """Input for FileReadTool.""" + pass + +class FileReadToolSchema(FixedFileReadToolSchema): + """Input for FileReadTool.""" + file_path: str = Field(..., description="Mandatory file path to read the file") + +class FileReadTool(BaseTool): + name: str = "Read a file's content" + description: str = "A tool that can be used to read a file's content." + args_schema: Type[BaseModel] = FileReadToolSchema + file_path: Optional[str] = None + + def __init__(self, file_path: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if file_path is not None: + self.file_path = file_path + self.description = f"A tool that can be used to read {file_path}'s content." + self.args_schema = FixedFileReadToolSchema + + def _run( + self, + **kwargs: Any, + ) -> Any: + file_path = kwargs.get('file_path', self.file_path) + with open(file_path, 'r') as file: + return file.read() \ No newline at end of file diff --git a/src/crewai_tools/tools/github_search_tool/github_search_tool.py b/src/crewai_tools/tools/github_search_tool/github_search_tool.py new file mode 100644 index 000000000..7b6066e00 --- /dev/null +++ b/src/crewai_tools/tools/github_search_tool/github_search_tool.py @@ -0,0 +1,44 @@ +from typing import Optional, Type, List, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.loaders.github import GithubLoader + +from ..rag.rag_tool import RagTool + + +class FixedGithubSearchToolSchema(BaseModel): + """Input for GithubSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the github repo's content") + +class GithubSearchToolSchema(FixedGithubSearchToolSchema): + """Input for GithubSearchTool.""" + github_repo: str = Field(..., description="Mandatory github you want to search") + content_types: List[str] = Field(..., description="Mandatory content types you want to be inlcuded search, options: [code, repo, pr, issue]") + +class GithubSearchTool(RagTool): + name: str = "Search a github repo's content" + description: str = "A tool that can be used to semantic search a query from a github repo's content." + summarize: bool = False + gh_token: str = None + args_schema: Type[BaseModel] = GithubSearchToolSchema + github_repo: Optional[str] = None + content_types: List[str] + + def __init__(self, github_repo: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if github_repo is not None: + self.github_repo = github_repo + self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content." + self.args_schema = FixedGithubSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + github_repo = kwargs.get('github_repo', self.github_repo) + loader = GithubLoader(config={"token": self.gh_token}) + app = App() + app.add(f"repo:{github_repo} type:{','.join(self.content_types)}", data_type="github", loader=loader) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/json_search_tool/json_search_tool.py b/src/crewai_tools/tools/json_search_tool/json_search_tool.py new file mode 100644 index 000000000..89e515e78 --- /dev/null +++ b/src/crewai_tools/tools/json_search_tool/json_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedJSONSearchToolSchema(BaseModel): + """Input for JSONSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the JSON's content") + +class JSONSearchToolSchema(FixedJSONSearchToolSchema): + """Input for JSONSearchTool.""" + json_path: str = Field(..., description="Mandatory json path you want to search") + +class JSONSearchTool(RagTool): + name: str = "Search a JSON's content" + description: str = "A tool that can be used to semantic search a query from a JSON's content." + summarize: bool = False + args_schema: Type[BaseModel] = JSONSearchToolSchema + json_path: Optional[str] = None + + def __init__(self, json_path: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if json_path is not None: + self.json_path = json_path + self.description = f"A tool that can be used to semantic search a query the {json} JSON's content." + self.args_schema = FixedJSONSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + json_path = kwargs.get('json_path', self.json_path) + self.app = App() + self.app.add(json_path, data_type=DataType.JSON) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py b/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py new file mode 100644 index 000000000..0f4deb056 --- /dev/null +++ b/src/crewai_tools/tools/mdx_seach_tool/mdx_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedMDXSearchToolSchema(BaseModel): + """Input for MDXSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the MDX's content") + +class MDXSearchToolSchema(FixedMDXSearchToolSchema): + """Input for MDXSearchTool.""" + mdx: str = Field(..., description="Mandatory mdx path you want to search") + +class MDXSearchTool(RagTool): + name: str = "Search a MDX's content" + description: str = "A tool that can be used to semantic search a query from a MDX's content." + summarize: bool = False + args_schema: Type[BaseModel] = MDXSearchToolSchema + mdx: Optional[str] = None + + def __init__(self, mdx: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if mdx is not None: + self.mdx = mdx + self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content." + self.args_schema = FixedMDXSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + mdx = kwargs.get('mdx', self.mdx) + self.app = App() + self.app.add(mdx, data_type=DataType.MDX) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py b/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py new file mode 100644 index 000000000..ba54e34ca --- /dev/null +++ b/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedPDFSearchToolSchema(BaseModel): + """Input for PDFSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the PDF's content") + +class PDFSearchToolSchema(FixedPDFSearchToolSchema): + """Input for PDFSearchTool.""" + pdf: str = Field(..., description="Mandatory pdf path you want to search") + +class PDFSearchTool(RagTool): + name: str = "Search a PDF's content" + description: str = "A tool that can be used to semantic search a query from a PDF's content." + summarize: bool = False + args_schema: Type[BaseModel] = PDFSearchToolSchema + pdf: Optional[str] = None + + def __init__(self, pdf: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if pdf is not None: + self.pdf = pdf + self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content." + self.args_schema = FixedPDFSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + pdf = kwargs.get('pdf', self.pdf) + self.app = App() + self.app.add(pdf, data_type=DataType.PDF_FILE) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py b/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py new file mode 100644 index 000000000..f625bebc9 --- /dev/null +++ b/src/crewai_tools/tools/pg_seach_tool/pg_search_tool.py @@ -0,0 +1,44 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.loaders.postgres import PostgresLoader + +from ..rag.rag_tool import RagTool + +class PGSearchToolSchema(BaseModel): + """Input for PGSearchTool.""" + search_query: str = Field(..., description="Mandatory semantic search query you want to use to search the database's content") + +class PGSearchTool(RagTool): + name: str = "Search a database's table content" + description: str = "A tool that can be used to semantic search a query from a database table's content." + summarize: bool = False + args_schema: Type[BaseModel] = PGSearchToolSchema + db_uri: str = Field(..., description="Mandatory database URI") + table_name: str = Field(..., description="Mandatory table name") + search_query: str = Field(..., description="Mandatory semantic search query you want to use to search the database's content") + + def __init__(self, table_name: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if table_name is not None: + self.table_name = table_name + self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content." + else: + raise('To use PGSearchTool, you must provide a `table_name` argument') + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + + config = { "url": self.db_uri } + postgres_loader = PostgresLoader(config=config) + app = App() + app.add( + f"SELECT * FROM {self.table_name};", + data_type='postgres', + loader=postgres_loader + ) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/rag/rag_tool.py b/src/crewai_tools/tools/rag/rag_tool.py index 4e81df5af..3901129ff 100644 --- a/src/crewai_tools/tools/rag/rag_tool.py +++ b/src/crewai_tools/tools/rag/rag_tool.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from typing import Any, List, Optional -from pydantic import BaseModel, ConfigDict +from pydantic.v1 import BaseModel, ConfigDict from crewai_tools.tools.base_tool import BaseTool @@ -14,125 +14,20 @@ class Adapter(BaseModel, ABC): """Query the knowledge base with a question and return the answer.""" class RagTool(BaseTool): + model_config = ConfigDict(arbitrary_types_allowed=True) name: str = "Knowledge base" description: str = "A knowledge base that can be used to answer questions." + summarize: bool = False adapter: Optional[Adapter] = None + app: Optional[Any] = None def _run( self, - *args: Any, - **kwargs: Any, + query: str, ) -> Any: - return self.adapter.query(args[0]) - - def from_file(self, file_path: str): - from embedchain import App - from embedchain.models.data_type import DataType - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter - - app = App() - app.add(file_path, data_type=DataType.TEXT_FILE) - - adapter = EmbedchainAdapter(embedchain_app=app) - return RagTool(adapter=adapter) - - def from_directory(self, directory_path: str): - from embedchain import App - from embedchain.loaders.directory_loader import DirectoryLoader - - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter - - loader = DirectoryLoader(config=dict(recursive=True)) - - app = App() - app.add(directory_path, loader=loader) - - adapter = EmbedchainAdapter(embedchain_app=app) - return RagTool(adapter=adapter) - - def from_pg_db(self, db_uri: str, table_name: str): - from embedchain import App - from embedchain.models.data_type import DataType - from embedchain.loaders.postgres import PostgresLoader - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter - - config = { "url": db_uri } - postgres_loader = PostgresLoader(config=config) - app = App() - app.add( - f"SELECT * FROM {table_name};", - data_type='postgres', - loader=postgres_loader - ) - adapter = EmbedchainAdapter(embedchain_app=app) - return RagTool(adapter=adapter) - - - def from_github_repo(self, gh_token: str, gh_repo: str, type: List[str] = ["repo"]): - from embedchain import App - from embedchain.loaders.github import GithubLoader - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter - - loader = GithubLoader( - config={ - "token": gh_token, - } - ) - app = App() - app.add(f"repo:{gh_repo} type:{','.join(type)}", data_type="github", loader=loader) - adapter = EmbedchainAdapter(embedchain_app=app) - return RagTool(adapter=adapter) - - def from_xml_file(self, file_url: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_url, DataType.XML) - - def from_docx_file(self, file_url: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_url, DataType.DOCX) - - def from_docx_file(self, file_url: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_url, DataType.DOCX) - - def from_mdx_file(self, file_url: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_url, DataType.MDX) - - def from_code_docs(self, docs_url: str): - from embedchain.models.data_type import DataType - return self._from_generic(docs_url, DataType.DOCS_SITE) - - def from_youtube_channel(self, channel_handle: str): - from embedchain.models.data_type import DataType - if not channel_handle.startswith("@"): - channel_handle = f"@{channel_handle}" - return self._from_generic(channel_handle, DataType.YOUTUBE_CHANNEL) - - def from_website(self, url: str): - from embedchain.models.data_type import DataType - return self._from_generic(url, DataType.WEB_PAGE) - - def from_text(self, text: str): - from embedchain.models.data_type import DataType - return self._from_generic(text, DataType.TEXT) - - def from_json(self, file_path: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_path, DataType.JSON) - - def from_csv(self, file_path: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_path, DataType.CSV) - - def from_pdf(self, file_path: str): - from embedchain.models.data_type import DataType - return self._from_generic(file_path, DataType.PDF_FILE) - - def from_web_page(self, url: str): - from embedchain.models.data_type import DataType - return self._from_generic(url, DataType.WEB_PAGE) + self.adapter = EmbedchainAdapter(embedchain_app=self.app, summarize=self.summarize) + return f"Relevant Content:\n{self.adapter.query(query)}" def from_embedchain(self, config_path: str): from embedchain import App @@ -140,12 +35,4 @@ class RagTool(BaseTool): app = App.from_config(config_path=config_path) adapter = EmbedchainAdapter(embedchain_app=app) - return RagTool(name=self.name, description=self.description, adapter=adapter) - - def _from_generic(self, source: str, type: str): - from embedchain import App - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter - app = App() - app.add(source, data_type=type) - adapter = EmbedchainAdapter(embedchain_app=app) return RagTool(name=self.name, description=self.description, adapter=adapter) \ No newline at end of file diff --git a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py new file mode 100644 index 000000000..d4a886a73 --- /dev/null +++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -0,0 +1,47 @@ +import os +import json +import requests + +from typing import Type, Any +from pydantic.v1 import BaseModel, Field +from crewai_tools.tools.base_tool import BaseTool + +class SeperDevToolSchema(BaseModel): + """Input for TXTSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the internet") + +class SeperDevTool(BaseTool): + name: str = "Search the internet" + description: str = "A tool that can be used to semantic search a query from a txt's content." + args_schema: Type[BaseModel] = SeperDevToolSchema + search_url: str = "https://google.serper.dev/search" + n_results: int = None + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + payload = json.dumps({"q": search_query}) + headers = { + 'X-API-KEY': os.environ['SERPER_API_KEY'], + 'content-type': 'application/json' + } + response = requests.request("POST", self.search_url, headers=headers, data=payload) + results = response.json()['organic'] + stirng = [] + for result in results: + print(result) + print('--------------') + try: + stirng.append('\n'.join([ + f"Title: {result['title']}", + f"Link: {result['link']}", + f"Snippet: {result['snippet']}", + "---" + ])) + except KeyError: + next + + content = '\n'.join(stirng) + return f"\nSearch results: {content}\n" diff --git a/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py b/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py new file mode 100644 index 000000000..130f6f164 --- /dev/null +++ b/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py @@ -0,0 +1,39 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + +class FixedTXTSearchToolSchema(BaseModel): + """Input for TXTSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the txt's content") + +class TXTSearchToolSchema(FixedTXTSearchToolSchema): + """Input for TXTSearchTool.""" + txt: str = Field(..., description="Mandatory txt path you want to search") + +class TXTSearchTool(RagTool): + name: str = "Search a txt's content" + description: str = "A tool that can be used to semantic search a query from a txt's content." + summarize: bool = False + args_schema: Type[BaseModel] = TXTSearchToolSchema + txt: Optional[str] = None + + def __init__(self, txt: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if txt is not None: + self.txt = txt + self.description = f"A tool that can be used to semantic search a query the {txt} txt's content." + self.args_schema = FixedTXTSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + txt = kwargs.get('txt', self.txt) + self.app = App() + self.app.add(txt, data_type=DataType.TEXT_FILE) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/website_search/website_search_tool.py b/src/crewai_tools/tools/website_search/website_search_tool.py new file mode 100644 index 000000000..f4cffa9c9 --- /dev/null +++ b/src/crewai_tools/tools/website_search/website_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedWebsiteSearchToolSchema(BaseModel): + """Input for WebsiteSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search a specific website") + +class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema): + """Input for WebsiteSearchTool.""" + website: str = Field(..., description="Mandatory valid website URL you want to search on") + +class WebsiteSearchTool(RagTool): + name: str = "Search in a specific website" + description: str = "A tool that can be used to semantic search a query from a specific URL content." + summarize: bool = False + args_schema: Type[BaseModel] = WebsiteSearchToolSchema + website: Optional[str] = None + + def __init__(self, website: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if website is not None: + self.website = website + self.description = f"A tool that can be used to semantic search a query from {website} website content." + self.args_schema = FixedWebsiteSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + website = kwargs.get('website', self.website) + self.app = App() + self.app.add(website, data_type=DataType.WEB_PAGE) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py b/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py new file mode 100644 index 000000000..9259b819f --- /dev/null +++ b/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedXMLSearchToolSchema(BaseModel): + """Input for XMLSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the XML's content") + +class XMLSearchToolSchema(FixedXMLSearchToolSchema): + """Input for XMLSearchTool.""" + xml: str = Field(..., description="Mandatory xml path you want to search") + +class XMLSearchTool(RagTool): + name: str = "Search a XML's content" + description: str = "A tool that can be used to semantic search a query from a XML's content." + summarize: bool = False + args_schema: Type[BaseModel] = XMLSearchToolSchema + xml: Optional[str] = None + + def __init__(self, xml: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if xml is not None: + self.xml = xml + self.description = f"A tool that can be used to semantic search a query the {xml} XML's content." + self.args_schema = FixedXMLSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + xml = kwargs.get('xml', self.xml) + self.app = App() + self.app.add(xml, data_type=DataType.XML) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py b/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py new file mode 100644 index 000000000..9b4e51688 --- /dev/null +++ b/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py @@ -0,0 +1,42 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedYoutubeChannelSearchToolSchema(BaseModel): + """Input for YoutubeChannelSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the Youtube Channels content") + +class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema): + """Input for YoutubeChannelSearchTool.""" + youtube_channel_handle: str = Field(..., description="Mandatory youtube_channel_handle path you want to search") + +class YoutubeChannelSearchTool(RagTool): + name: str = "Search a Youtube Channels content" + description: str = "A tool that can be used to semantic search a query from a Youtube Channels content." + summarize: bool = False + args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema + youtube_channel_handle: Optional[str] = None + + def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if youtube_channel_handle is not None: + self.youtube_channel_handle = youtube_channel_handle + self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content." + self.args_schema = FixedYoutubeChannelSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + youtube_channel_handle = kwargs.get('youtube_channel_handle', self.youtube_channel_handle) + if not youtube_channel_handle.startswith("@"): + youtube_channel_handle = f"@{youtube_channel_handle}" + self.app = App() + self.app.add(youtube_channel_handle, data_type=DataType.YOUTUBE_CHANNEL) + return super()._run(query=search_query) \ No newline at end of file diff --git a/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py b/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py new file mode 100644 index 000000000..7b26c8e90 --- /dev/null +++ b/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py @@ -0,0 +1,40 @@ +from typing import Optional, Type, Any +from pydantic.v1 import BaseModel, Field + +from embedchain import App +from embedchain.models.data_type import DataType + +from ..rag.rag_tool import RagTool + + +class FixedYoutubeVideoSearchToolSchema(BaseModel): + """Input for YoutubeVideoSearchTool.""" + search_query: str = Field(..., description="Mandatory search query you want to use to search the Youtube Video content") + +class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema): + """Input for YoutubeVideoSearchTool.""" + youtube_video_url: str = Field(..., description="Mandatory youtube_video_url path you want to search") + +class YoutubeVideoSearchTool(RagTool): + name: str = "Search a Youtube Video content" + description: str = "A tool that can be used to semantic search a query from a Youtube Video content." + summarize: bool = False + args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema + youtube_video_url: Optional[str] = None + + def __init__(self, youtube_video_url: Optional[str] = None, **kwargs): + super().__init__(**kwargs) + if youtube_video_url is not None: + self.youtube_video_url = youtube_video_url + self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content." + self.args_schema = FixedYoutubeVideoSearchToolSchema + + def _run( + self, + search_query: str, + **kwargs: Any, + ) -> Any: + youtube_video_url = kwargs.get('youtube_video_url', self.youtube_video_url) + self.app = App() + self.app.add(youtube_video_url, data_type=DataType.YOUTUBE_VIDEO) + return super()._run(query=search_query) \ No newline at end of file