mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
Merge pull request #142 from crewAIInc/feat/weaviate-tool
setup weaviate vector search tool
This commit is contained in:
@@ -42,4 +42,5 @@ from .tools import (
|
||||
XMLSearchTool,
|
||||
YoutubeChannelSearchTool,
|
||||
YoutubeVideoSearchTool,
|
||||
WeaviateVectorSearchTool,
|
||||
)
|
||||
|
||||
@@ -51,3 +51,4 @@ from .youtube_channel_search_tool.youtube_channel_search_tool import (
|
||||
YoutubeChannelSearchTool,
|
||||
)
|
||||
from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
|
||||
from .weaviate_tool.vector_search import WeaviateVectorSearchTool
|
||||
|
||||
80
src/crewai_tools/tools/weaviate_tool/README.md
Normal file
80
src/crewai_tools/tools/weaviate_tool/README.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# WeaviateVectorSearchTool
|
||||
|
||||
## Description
|
||||
This tool is specifically crafted for conducting semantic searches within docs within a Weaviate vector database. Use this tool to find semantically similar docs to a given query.
|
||||
|
||||
Weaviate is a vector database that is used to store and query vector embeddings. You can follow their docs here: https://weaviate.io/developers/wcs/connect
|
||||
|
||||
## Installation
|
||||
Install the crewai_tools package by executing the following command in your terminal:
|
||||
|
||||
```shell
|
||||
uv pip install 'crewai[tools]'
|
||||
```
|
||||
|
||||
## Example
|
||||
To utilize the WeaviateVectorSearchTool for different use cases, follow these examples:
|
||||
|
||||
```python
|
||||
from crewai_tools import WeaviateVectorSearchTool
|
||||
|
||||
# To enable the tool to search any website the agent comes across or learns about during its operation
|
||||
tool = WeaviateVectorSearchTool(
|
||||
collection_name='example_collections',
|
||||
limit=3,
|
||||
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
|
||||
weaviate_api_key="your-weaviate-api-key",
|
||||
)
|
||||
|
||||
# or
|
||||
|
||||
# Setup custom model for vectorizer and generative model
|
||||
tool = WeaviateVectorSearchTool(
|
||||
collection_name='example_collections',
|
||||
limit=3,
|
||||
vectorizer=Configure.Vectorizer.text2vec_openai(model="nomic-embed-text"),
|
||||
generative_model=Configure.Generative.openai(model="gpt-4o-mini"),
|
||||
weaviate_cluster_url="https://your-weaviate-cluster-url.com",
|
||||
weaviate_api_key="your-weaviate-api-key",
|
||||
)
|
||||
|
||||
# Adding the tool to an agent
|
||||
rag_agent = Agent(
|
||||
name="rag_agent",
|
||||
role="You are a helpful assistant that can answer questions with the help of the WeaviateVectorSearchTool.",
|
||||
llm="gpt-4o-mini",
|
||||
tools=[tool],
|
||||
)
|
||||
```
|
||||
|
||||
## Arguments
|
||||
- `collection_name` : The name of the collection to search within. (Required)
|
||||
- `weaviate_cluster_url` : The URL of the Weaviate cluster. (Required)
|
||||
- `weaviate_api_key` : The API key for the Weaviate cluster. (Required)
|
||||
- `limit` : The number of results to return. (Optional)
|
||||
- `vectorizer` : The vectorizer to use. (Optional)
|
||||
- `generative_model` : The generative model to use. (Optional)
|
||||
|
||||
Preloading the Weaviate database with documents:
|
||||
|
||||
```python
|
||||
from crewai_tools import WeaviateVectorSearchTool
|
||||
|
||||
# Use before hooks to generate the documents and add them to the Weaviate database. Follow the weaviate docs: https://weaviate.io/developers/wcs/connect
|
||||
test_docs = client.collections.get("example_collections")
|
||||
|
||||
|
||||
docs_to_load = os.listdir("knowledge")
|
||||
with test_docs.batch.dynamic() as batch:
|
||||
for d in docs_to_load:
|
||||
with open(os.path.join("knowledge", d), "r") as f:
|
||||
content = f.read()
|
||||
batch.add_object(
|
||||
{
|
||||
"content": content,
|
||||
"year": d.split("_")[0],
|
||||
}
|
||||
)
|
||||
tool = WeaviateVectorSearchTool(collection_name='example_collections', limit=3)
|
||||
|
||||
```
|
||||
89
src/crewai_tools/tools/weaviate_tool/vector_search.py
Normal file
89
src/crewai_tools/tools/weaviate_tool/vector_search.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import os
|
||||
import json
|
||||
import weaviate
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Type, Optional
|
||||
from crewai.tools import BaseTool
|
||||
|
||||
from weaviate.classes.config import Configure, Vectorizers
|
||||
from weaviate.classes.init import Auth
|
||||
|
||||
|
||||
class WeaviateToolSchema(BaseModel):
|
||||
"""Input for WeaviateTool."""
|
||||
|
||||
query: str = Field(
|
||||
...,
|
||||
description="The query to search retrieve relevant information from the Weaviate database. Pass only the query, not the question.",
|
||||
)
|
||||
|
||||
|
||||
class WeaviateVectorSearchTool(BaseTool):
|
||||
"""Tool to search the Weaviate database"""
|
||||
|
||||
name: str = "WeaviateVectorSearchTool"
|
||||
description: str = "A tool to search the Weaviate database for relevant information on internal documents."
|
||||
args_schema: Type[BaseModel] = WeaviateToolSchema
|
||||
query: Optional[str] = None
|
||||
|
||||
vectorizer: Optional[Vectorizers] = Field(
|
||||
default=Configure.Vectorizer.text2vec_openai(
|
||||
model="nomic-embed-text",
|
||||
)
|
||||
)
|
||||
generative_model: Optional[str] = Field(
|
||||
default=Configure.Generative.openai(
|
||||
model="gpt-4o",
|
||||
),
|
||||
)
|
||||
collection_name: Optional[str] = None
|
||||
limit: Optional[int] = Field(default=3)
|
||||
headers: Optional[dict] = Field(
|
||||
default={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}
|
||||
)
|
||||
weaviate_cluster_url: str = Field(
|
||||
...,
|
||||
description="The URL of the Weaviate cluster",
|
||||
)
|
||||
weaviate_api_key: str = Field(
|
||||
...,
|
||||
description="The API key for the Weaviate cluster",
|
||||
)
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Search the Weaviate database
|
||||
|
||||
Args:
|
||||
query (str): The query to search retrieve relevant information from the Weaviate database. Pass only the query as a string, not the question.
|
||||
|
||||
Returns:
|
||||
str: The result of the search query
|
||||
"""
|
||||
|
||||
if not self.weaviate_cluster_url or not self.weaviate_api_key:
|
||||
raise ValueError("WEAVIATE_URL or WEAVIATE_API_KEY is not set")
|
||||
|
||||
client = weaviate.connect_to_weaviate_cloud(
|
||||
cluster_url=self.weaviate_cluster_url,
|
||||
auth_credentials=Auth.api_key(self.weaviate_api_key),
|
||||
headers=self.headers,
|
||||
)
|
||||
internal_docs = client.collections.get(self.collection_name)
|
||||
|
||||
if not internal_docs:
|
||||
internal_docs = client.collections.create(
|
||||
name=self.collection_name,
|
||||
vectorizer_config=self.vectorizer,
|
||||
generative_config=self.generative_model,
|
||||
)
|
||||
|
||||
response = internal_docs.query.near_text(
|
||||
query=query,
|
||||
limit=self.limit,
|
||||
)
|
||||
json_response = ""
|
||||
for obj in response.objects:
|
||||
json_response += json.dumps(obj.properties, indent=2)
|
||||
|
||||
client.close()
|
||||
return json_response
|
||||
Reference in New Issue
Block a user