diff --git a/src/crewai_tools/tools/rag/README.md b/src/crewai_tools/tools/rag/README.md new file mode 100644 index 000000000..c65daca16 --- /dev/null +++ b/src/crewai_tools/tools/rag/README.md @@ -0,0 +1,64 @@ +# RagTool: A Dynamic Knowledge Base Tool + +RagTool is designed to answer questions by leveraging the power of RAG by leveraging (EmbedChain). It integrates seamlessly with the CrewAI ecosystem, offering a versatile and powerful solution for information retrieval. + +## **Overview** + +RagTool enables users to dynamically query a knowledge base, making it an ideal tool for applications requiring access to a vast array of information. Its flexible design allows for integration with various data sources, including files, directories, web pages, yoututbe videos and custom configurations. + +## **Usage** + +RagTool can be instantiated with data from different sources, including: + +- ๐Ÿ“ฐ PDF file +- ๐Ÿ“Š CSV file +- ๐Ÿ“ƒ JSON file +- ๐Ÿ“ Text +- ๐Ÿ“ Directory/ Folder +- ๐ŸŒ HTML Web page +- ๐Ÿ“ฝ๏ธ Youtube Channel +- ๐Ÿ“บ Youtube Video +- ๐Ÿ“š Docs website +- ๐Ÿ“ MDX file +- ๐Ÿ“„ DOCX file +- ๐Ÿงพ XML file +- ๐Ÿ“ฌ Gmail +- ๐Ÿ“ Github +- ๐Ÿ˜ Postgres +- ๐Ÿฌ MySQL +- ๐Ÿค– Slack +- ๐Ÿ’ฌ Discord +- ๐Ÿ—จ๏ธ Discourse +- ๐Ÿ“ Substack +- ๐Ÿ Beehiiv +- ๐Ÿ’พ Dropbox +- ๐Ÿ–ผ๏ธ Image +- โš™๏ธ Custom + +#### **Creating an Instance** + +```python +from crewai_tools.tools.rag_tool import RagTool + +# Example: Loading from a file +rag_tool = RagTool().from_file('path/to/your/file.txt') + +# Example: Loading from a directory +rag_tool = RagTool().from_directory('path/to/your/directory') + +# Example: Loading from a web page +rag_tool = RagTool().from_web_page('https://example.com') + +# Example: Loading from an Embedchain configuration +rag_tool = RagTool().from_embedchain('path/to/your/config.json') +``` + +## **Contribution** + +Contributions to RagTool and the broader CrewAI tools ecosystem are welcome. To contribute, please follow the standard GitHub workflow for forking the repository, making changes, and submitting a pull request. + +## **License** + +RagTool is open-source and available under the MIT license. + +Thank you for considering RagTool for your knowledge base needs. Your contributions and feedback are invaluable to making RagTool even better. \ No newline at end of file diff --git a/src/crewai_tools/tools/rag/rag_tool.py b/src/crewai_tools/tools/rag/rag_tool.py index 5ef616795..df86d2a5f 100644 --- a/src/crewai_tools/tools/rag/rag_tool.py +++ b/src/crewai_tools/tools/rag/rag_tool.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any +from typing import Any, List from pydantic import BaseModel, ConfigDict @@ -13,7 +13,6 @@ class Adapter(BaseModel, ABC): def query(self, question: str) -> str: """Query the knowledge base with a question and return the answer.""" - class RagTool(BaseTool): name: str = "Knowledge base" description: str = "A knowledge base that can be used to answer questions." @@ -52,23 +51,100 @@ class RagTool(BaseTool): adapter = EmbedchainAdapter(embedchain_app=app) return RagTool(adapter=adapter) - def from_web_page(self, url: str): + def from_pg_db(self, db_uri: str, table_name: str): from embedchain import App - from embedchain.models.data_type import DataType - + from embedchain.loaders.postgres import PostgresLoader from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter + config = { "url": db_uri } + postgres_loader = PostgresLoader(config=config) app = App() - app.add(url, data_type=DataType.WEB_PAGE) - + app.add( + f"SELECT * FROM {table_name};", + data_type='postgres', + loader=postgres_loader + ) adapter = EmbedchainAdapter(embedchain_app=app) return RagTool(adapter=adapter) + + def from_github_repo(self, gh_token: str, gh_repo: str, type: List[str] = ["repo"]): + from embedchain import App + from embedchain.loaders.github import GithubLoader + from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter + + loader = GithubLoader( + config={ + "token": gh_token, + } + ) + app = App() + app.add(f"repo:{gh_repo} type:{",".joing(type)}", data_type="github", loader=loader) + adapter = EmbedchainAdapter(embedchain_app=app) + return RagTool(adapter=adapter) + + def from_xml_file(self, file_url: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_url, DataType.XML) + + def from_docx_file(self, file_url: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_url, DataType.DOCX) + + def from_docx_file(self, file_url: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_url, DataType.DOCX) + + def from_mdx_file(self, file_url: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_url, DataType.MDX) + + def from_code_docs(self, docs_url: str): + from embedchain.models.data_type import DataType + return self._from_generic(docs_url, DataType.DOCS_SITE) + + def from_youtube_channel(self, channel_handle: str): + from embedchain.models.data_type import DataType + if not channel_handle.startswith("@"): + channel_handle = f"@{channel_handle}" + return self._from_generic(channel_handle, DataType.YOUTUBE_CHANNEL) + + def from_website(self, url: str): + from embedchain.models.data_type import DataType + return self._from_generic(url, DataType.WEB_PAGE) + + def from_text(self, text: str): + from embedchain.models.data_type import DataType + return self._from_generic(text, DataType.TEXT) + + def from_json(self, file_path: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_path, DataType.JSON) + + def from_csv(self, file_path: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_path, DataType.CSV) + + def from_pdf(self, file_path: str): + from embedchain.models.data_type import DataType + return self._from_generic(file_path, DataType.PDF_FILE) + + def from_web_page(self, url: str): + from embedchain.models.data_type import DataType + return self._from_generic(url, DataType.WEB_PAGE) + def from_embedchain(self, config_path: str): from embedchain import App - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter app = App.from_config(config_path=config_path) adapter = EmbedchainAdapter(embedchain_app=app) return RagTool(adapter=adapter) + + def _from_generic(self, source: str, type: str): + from embedchain import App + from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter + app = App() + app.add(source, data_type=type) + adapter = EmbedchainAdapter(embedchain_app=app) + return RagTool(adapter=adapter) \ No newline at end of file