mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 16:48:30 +00:00
adding RAG spefic readme and implementing specific helpers
This commit is contained in:
64
src/crewai_tools/tools/rag/README.md
Normal file
64
src/crewai_tools/tools/rag/README.md
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# RagTool: A Dynamic Knowledge Base Tool
|
||||||
|
|
||||||
|
RagTool is designed to answer questions by leveraging the power of RAG by leveraging (EmbedChain). It integrates seamlessly with the CrewAI ecosystem, offering a versatile and powerful solution for information retrieval.
|
||||||
|
|
||||||
|
## **Overview**
|
||||||
|
|
||||||
|
RagTool enables users to dynamically query a knowledge base, making it an ideal tool for applications requiring access to a vast array of information. Its flexible design allows for integration with various data sources, including files, directories, web pages, yoututbe videos and custom configurations.
|
||||||
|
|
||||||
|
## **Usage**
|
||||||
|
|
||||||
|
RagTool can be instantiated with data from different sources, including:
|
||||||
|
|
||||||
|
- 📰 PDF file
|
||||||
|
- 📊 CSV file
|
||||||
|
- 📃 JSON file
|
||||||
|
- 📝 Text
|
||||||
|
- 📁 Directory/ Folder
|
||||||
|
- 🌐 HTML Web page
|
||||||
|
- 📽️ Youtube Channel
|
||||||
|
- 📺 Youtube Video
|
||||||
|
- 📚 Docs website
|
||||||
|
- 📝 MDX file
|
||||||
|
- 📄 DOCX file
|
||||||
|
- 🧾 XML file
|
||||||
|
- 📬 Gmail
|
||||||
|
- 📝 Github
|
||||||
|
- 🐘 Postgres
|
||||||
|
- 🐬 MySQL
|
||||||
|
- 🤖 Slack
|
||||||
|
- 💬 Discord
|
||||||
|
- 🗨️ Discourse
|
||||||
|
- 📝 Substack
|
||||||
|
- 🐝 Beehiiv
|
||||||
|
- 💾 Dropbox
|
||||||
|
- 🖼️ Image
|
||||||
|
- ⚙️ Custom
|
||||||
|
|
||||||
|
#### **Creating an Instance**
|
||||||
|
|
||||||
|
```python
|
||||||
|
from crewai_tools.tools.rag_tool import RagTool
|
||||||
|
|
||||||
|
# Example: Loading from a file
|
||||||
|
rag_tool = RagTool().from_file('path/to/your/file.txt')
|
||||||
|
|
||||||
|
# Example: Loading from a directory
|
||||||
|
rag_tool = RagTool().from_directory('path/to/your/directory')
|
||||||
|
|
||||||
|
# Example: Loading from a web page
|
||||||
|
rag_tool = RagTool().from_web_page('https://example.com')
|
||||||
|
|
||||||
|
# Example: Loading from an Embedchain configuration
|
||||||
|
rag_tool = RagTool().from_embedchain('path/to/your/config.json')
|
||||||
|
```
|
||||||
|
|
||||||
|
## **Contribution**
|
||||||
|
|
||||||
|
Contributions to RagTool and the broader CrewAI tools ecosystem are welcome. To contribute, please follow the standard GitHub workflow for forking the repository, making changes, and submitting a pull request.
|
||||||
|
|
||||||
|
## **License**
|
||||||
|
|
||||||
|
RagTool is open-source and available under the MIT license.
|
||||||
|
|
||||||
|
Thank you for considering RagTool for your knowledge base needs. Your contributions and feedback are invaluable to making RagTool even better.
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any
|
from typing import Any, List
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
@@ -13,7 +13,6 @@ class Adapter(BaseModel, ABC):
|
|||||||
def query(self, question: str) -> str:
|
def query(self, question: str) -> str:
|
||||||
"""Query the knowledge base with a question and return the answer."""
|
"""Query the knowledge base with a question and return the answer."""
|
||||||
|
|
||||||
|
|
||||||
class RagTool(BaseTool):
|
class RagTool(BaseTool):
|
||||||
name: str = "Knowledge base"
|
name: str = "Knowledge base"
|
||||||
description: str = "A knowledge base that can be used to answer questions."
|
description: str = "A knowledge base that can be used to answer questions."
|
||||||
@@ -52,23 +51,100 @@ class RagTool(BaseTool):
|
|||||||
adapter = EmbedchainAdapter(embedchain_app=app)
|
adapter = EmbedchainAdapter(embedchain_app=app)
|
||||||
return RagTool(adapter=adapter)
|
return RagTool(adapter=adapter)
|
||||||
|
|
||||||
def from_web_page(self, url: str):
|
def from_pg_db(self, db_uri: str, table_name: str):
|
||||||
from embedchain import App
|
from embedchain import App
|
||||||
from embedchain.models.data_type import DataType
|
from embedchain.loaders.postgres import PostgresLoader
|
||||||
|
|
||||||
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
|
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
|
||||||
|
|
||||||
|
config = { "url": db_uri }
|
||||||
|
postgres_loader = PostgresLoader(config=config)
|
||||||
app = App()
|
app = App()
|
||||||
app.add(url, data_type=DataType.WEB_PAGE)
|
app.add(
|
||||||
|
f"SELECT * FROM {table_name};",
|
||||||
|
data_type='postgres',
|
||||||
|
loader=postgres_loader
|
||||||
|
)
|
||||||
adapter = EmbedchainAdapter(embedchain_app=app)
|
adapter = EmbedchainAdapter(embedchain_app=app)
|
||||||
return RagTool(adapter=adapter)
|
return RagTool(adapter=adapter)
|
||||||
|
|
||||||
|
|
||||||
|
def from_github_repo(self, gh_token: str, gh_repo: str, type: List[str] = ["repo"]):
|
||||||
|
from embedchain import App
|
||||||
|
from embedchain.loaders.github import GithubLoader
|
||||||
|
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
|
||||||
|
|
||||||
|
loader = GithubLoader(
|
||||||
|
config={
|
||||||
|
"token": gh_token,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
app = App()
|
||||||
|
app.add(f"repo:{gh_repo} type:{",".joing(type)}", data_type="github", loader=loader)
|
||||||
|
adapter = EmbedchainAdapter(embedchain_app=app)
|
||||||
|
return RagTool(adapter=adapter)
|
||||||
|
|
||||||
|
def from_xml_file(self, file_url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_url, DataType.XML)
|
||||||
|
|
||||||
|
def from_docx_file(self, file_url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_url, DataType.DOCX)
|
||||||
|
|
||||||
|
def from_docx_file(self, file_url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_url, DataType.DOCX)
|
||||||
|
|
||||||
|
def from_mdx_file(self, file_url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_url, DataType.MDX)
|
||||||
|
|
||||||
|
def from_code_docs(self, docs_url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(docs_url, DataType.DOCS_SITE)
|
||||||
|
|
||||||
|
def from_youtube_channel(self, channel_handle: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
if not channel_handle.startswith("@"):
|
||||||
|
channel_handle = f"@{channel_handle}"
|
||||||
|
return self._from_generic(channel_handle, DataType.YOUTUBE_CHANNEL)
|
||||||
|
|
||||||
|
def from_website(self, url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(url, DataType.WEB_PAGE)
|
||||||
|
|
||||||
|
def from_text(self, text: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(text, DataType.TEXT)
|
||||||
|
|
||||||
|
def from_json(self, file_path: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_path, DataType.JSON)
|
||||||
|
|
||||||
|
def from_csv(self, file_path: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_path, DataType.CSV)
|
||||||
|
|
||||||
|
def from_pdf(self, file_path: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(file_path, DataType.PDF_FILE)
|
||||||
|
|
||||||
|
def from_web_page(self, url: str):
|
||||||
|
from embedchain.models.data_type import DataType
|
||||||
|
return self._from_generic(url, DataType.WEB_PAGE)
|
||||||
|
|
||||||
def from_embedchain(self, config_path: str):
|
def from_embedchain(self, config_path: str):
|
||||||
from embedchain import App
|
from embedchain import App
|
||||||
|
|
||||||
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
|
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
|
||||||
|
|
||||||
app = App.from_config(config_path=config_path)
|
app = App.from_config(config_path=config_path)
|
||||||
adapter = EmbedchainAdapter(embedchain_app=app)
|
adapter = EmbedchainAdapter(embedchain_app=app)
|
||||||
return RagTool(adapter=adapter)
|
return RagTool(adapter=adapter)
|
||||||
|
|
||||||
|
def _from_generic(self, source: str, type: str):
|
||||||
|
from embedchain import App
|
||||||
|
from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter
|
||||||
|
app = App()
|
||||||
|
app.add(source, data_type=type)
|
||||||
|
adapter = EmbedchainAdapter(embedchain_app=app)
|
||||||
|
return RagTool(adapter=adapter)
|
||||||
Reference in New Issue
Block a user