mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
Enhance QdrantVectorSearchTool (#3806)
Some checks failed
Some checks failed
This commit is contained in:
@@ -23,13 +23,15 @@ Here's a minimal example of how to use the tool:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
from crewai import Agent
|
from crewai import Agent
|
||||||
from crewai_tools import QdrantVectorSearchTool
|
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||||
|
|
||||||
# Initialize the tool
|
# Initialize the tool with QdrantConfig
|
||||||
qdrant_tool = QdrantVectorSearchTool(
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
qdrant_url="your_qdrant_url",
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key="your_qdrant_api_key",
|
qdrant_url="your_qdrant_url",
|
||||||
collection_name="your_collection"
|
qdrant_api_key="your_qdrant_api_key",
|
||||||
|
collection_name="your_collection"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create an agent that uses the tool
|
# Create an agent that uses the tool
|
||||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
|||||||
def get_openai_embedding(text):
|
def get_openai_embedding(text):
|
||||||
response = client.embeddings.create(
|
response = client.embeddings.create(
|
||||||
input=text,
|
input=text,
|
||||||
model="text-embedding-3-small"
|
model="text-embedding-3-large"
|
||||||
)
|
)
|
||||||
return response.data[0].embedding
|
return response.data[0].embedding
|
||||||
|
|
||||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
|||||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||||
# Extract text from PDF
|
# Extract text from PDF
|
||||||
text_chunks = extract_text_from_pdf(pdf_path)
|
text_chunks = extract_text_from_pdf(pdf_path)
|
||||||
|
|
||||||
# Create Qdrant collection
|
# Create Qdrant collection
|
||||||
if qdrant.collection_exists(collection_name):
|
if qdrant.collection_exists(collection_name):
|
||||||
qdrant.delete_collection(collection_name)
|
qdrant.delete_collection(collection_name)
|
||||||
qdrant.create_collection(
|
qdrant.create_collection(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Store embeddings
|
# Store embeddings
|
||||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
|||||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||||
|
|
||||||
# Initialize Qdrant search tool
|
# Initialize Qdrant search tool
|
||||||
|
from crewai_tools import QdrantConfig
|
||||||
|
|
||||||
qdrant_tool = QdrantVectorSearchTool(
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
qdrant_url=os.getenv("QDRANT_URL"),
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
qdrant_url=os.getenv("QDRANT_URL"),
|
||||||
collection_name=collection_name,
|
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||||
limit=3,
|
collection_name=collection_name,
|
||||||
score_threshold=0.35
|
limit=3,
|
||||||
|
score_threshold=0.35
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create CrewAI agents
|
# Create CrewAI agents
|
||||||
search_agent = Agent(
|
search_agent = Agent(
|
||||||
role="Senior Semantic Search Agent",
|
role="Senior Semantic Search Agent",
|
||||||
goal="Find and analyze documents based on semantic search",
|
goal="Find and analyze documents based on semantic search",
|
||||||
backstory="""You are an expert research assistant who can find relevant
|
backstory="""You are an expert research assistant who can find relevant
|
||||||
information using semantic search in a Qdrant database.""",
|
information using semantic search in a Qdrant database.""",
|
||||||
tools=[qdrant_tool],
|
tools=[qdrant_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
|||||||
answer_agent = Agent(
|
answer_agent = Agent(
|
||||||
role="Senior Answer Assistant",
|
role="Senior Answer Assistant",
|
||||||
goal="Generate answers to questions based on the context provided",
|
goal="Generate answers to questions based on the context provided",
|
||||||
backstory="""You are an expert answer assistant who can generate
|
backstory="""You are an expert answer assistant who can generate
|
||||||
answers to questions based on the context provided.""",
|
answers to questions based on the context provided.""",
|
||||||
tools=[qdrant_tool],
|
tools=[qdrant_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -180,21 +186,82 @@ print(result)
|
|||||||
## Tool Parameters
|
## Tool Parameters
|
||||||
|
|
||||||
### Required Parameters
|
### Required Parameters
|
||||||
- `qdrant_url` (str): The URL of your Qdrant server
|
- `qdrant_config` (QdrantConfig): Configuration object containing all Qdrant settings
|
||||||
- `qdrant_api_key` (str): API key for authentication with Qdrant
|
|
||||||
- `collection_name` (str): Name of the Qdrant collection to search
|
|
||||||
|
|
||||||
### Optional Parameters
|
### QdrantConfig Parameters
|
||||||
|
- `qdrant_url` (str): The URL of your Qdrant server
|
||||||
|
- `qdrant_api_key` (str, optional): API key for authentication with Qdrant
|
||||||
|
- `collection_name` (str): Name of the Qdrant collection to search
|
||||||
- `limit` (int): Maximum number of results to return (default: 3)
|
- `limit` (int): Maximum number of results to return (default: 3)
|
||||||
- `score_threshold` (float): Minimum similarity score threshold (default: 0.35)
|
- `score_threshold` (float): Minimum similarity score threshold (default: 0.35)
|
||||||
|
- `filter` (Any, optional): Qdrant Filter instance for advanced filtering (default: None)
|
||||||
|
|
||||||
|
### Optional Tool Parameters
|
||||||
- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization
|
- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization
|
||||||
|
- `qdrant_package` (str): Base package path for Qdrant (default: "qdrant_client")
|
||||||
|
- `client` (Any): Pre-initialized Qdrant client (optional)
|
||||||
|
|
||||||
|
## Advanced Filtering
|
||||||
|
|
||||||
|
The QdrantVectorSearchTool supports powerful filtering capabilities to refine your search results:
|
||||||
|
|
||||||
|
### Dynamic Filtering
|
||||||
|
Use `filter_by` and `filter_value` parameters in your search to filter results on-the-fly:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Agent will use these parameters when calling the tool
|
||||||
|
# The tool schema accepts filter_by and filter_value
|
||||||
|
# Example: search with category filter
|
||||||
|
# Results will be filtered where category == "technology"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Preset Filters with QdrantConfig
|
||||||
|
For complex filtering, use Qdrant Filter instances in your configuration:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from qdrant_client.http import models as qmodels
|
||||||
|
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||||
|
|
||||||
|
# Create a filter for specific conditions
|
||||||
|
preset_filter = qmodels.Filter(
|
||||||
|
must=[
|
||||||
|
qmodels.FieldCondition(
|
||||||
|
key="category",
|
||||||
|
match=qmodels.MatchValue(value="research")
|
||||||
|
),
|
||||||
|
qmodels.FieldCondition(
|
||||||
|
key="year",
|
||||||
|
match=qmodels.MatchValue(value=2024)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize tool with preset filter
|
||||||
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
|
qdrant_config=QdrantConfig(
|
||||||
|
qdrant_url="your_url",
|
||||||
|
qdrant_api_key="your_key",
|
||||||
|
collection_name="your_collection",
|
||||||
|
filter=preset_filter # Preset filter applied to all searches
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Combining Filters
|
||||||
|
The tool automatically combines preset filters from `QdrantConfig` with dynamic filters from `filter_by` and `filter_value`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# If QdrantConfig has a preset filter for category="research"
|
||||||
|
# And the search uses filter_by="year", filter_value=2024
|
||||||
|
# Both filters will be combined (AND logic)
|
||||||
|
```
|
||||||
|
|
||||||
## Search Parameters
|
## Search Parameters
|
||||||
|
|
||||||
The tool accepts these parameters in its schema:
|
The tool accepts these parameters in its schema:
|
||||||
- `query` (str): The search query to find similar documents
|
- `query` (str): The search query to find similar documents
|
||||||
- `filter_by` (str, optional): Metadata field to filter on
|
- `filter_by` (str, optional): Metadata field to filter on
|
||||||
- `filter_value` (str, optional): Value to filter by
|
- `filter_value` (Any, optional): Value to filter by
|
||||||
|
|
||||||
## Return Format
|
## Return Format
|
||||||
|
|
||||||
@@ -214,7 +281,7 @@ The tool returns results in JSON format:
|
|||||||
|
|
||||||
## Default Embedding
|
## Default Embedding
|
||||||
|
|
||||||
By default, the tool uses OpenAI's `text-embedding-3-small` model for vectorization. This requires:
|
By default, the tool uses OpenAI's `text-embedding-3-large` model for vectorization. This requires:
|
||||||
- OpenAI API key set in environment: `OPENAI_API_KEY`
|
- OpenAI API key set in environment: `OPENAI_API_KEY`
|
||||||
|
|
||||||
## Custom Embeddings
|
## Custom Embeddings
|
||||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
|||||||
# Tokenize and get model outputs
|
# Tokenize and get model outputs
|
||||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||||
outputs = model(**inputs)
|
outputs = model(**inputs)
|
||||||
|
|
||||||
# Use mean pooling to get text embedding
|
# Use mean pooling to get text embedding
|
||||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||||
|
|
||||||
# Convert to list of floats and return
|
# Convert to list of floats and return
|
||||||
return embeddings[0].tolist()
|
return embeddings[0].tolist()
|
||||||
|
|
||||||
# Use custom embeddings with the tool
|
# Use custom embeddings with the tool
|
||||||
|
from crewai_tools import QdrantConfig
|
||||||
|
|
||||||
tool = QdrantVectorSearchTool(
|
tool = QdrantVectorSearchTool(
|
||||||
qdrant_url="your_url",
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key="your_key",
|
qdrant_url="your_url",
|
||||||
collection_name="your_collection",
|
qdrant_api_key="your_key",
|
||||||
|
collection_name="your_collection"
|
||||||
|
),
|
||||||
custom_embedding_fn=custom_embeddings # Pass your custom function
|
custom_embedding_fn=custom_embeddings # Pass your custom function
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -269,4 +340,4 @@ Required environment variables:
|
|||||||
```bash
|
```bash
|
||||||
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
||||||
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
||||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||||
|
|||||||
@@ -23,13 +23,15 @@ uv add qdrant-client
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
from crewai import Agent
|
from crewai import Agent
|
||||||
from crewai_tools import QdrantVectorSearchTool
|
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||||
|
|
||||||
# Initialize the tool
|
# QdrantConfig로 도구 초기화
|
||||||
qdrant_tool = QdrantVectorSearchTool(
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
qdrant_url="your_qdrant_url",
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key="your_qdrant_api_key",
|
qdrant_url="your_qdrant_url",
|
||||||
collection_name="your_collection"
|
qdrant_api_key="your_qdrant_api_key",
|
||||||
|
collection_name="your_collection"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create an agent that uses the tool
|
# Create an agent that uses the tool
|
||||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
|||||||
def get_openai_embedding(text):
|
def get_openai_embedding(text):
|
||||||
response = client.embeddings.create(
|
response = client.embeddings.create(
|
||||||
input=text,
|
input=text,
|
||||||
model="text-embedding-3-small"
|
model="text-embedding-3-large"
|
||||||
)
|
)
|
||||||
return response.data[0].embedding
|
return response.data[0].embedding
|
||||||
|
|
||||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
|||||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||||
# Extract text from PDF
|
# Extract text from PDF
|
||||||
text_chunks = extract_text_from_pdf(pdf_path)
|
text_chunks = extract_text_from_pdf(pdf_path)
|
||||||
|
|
||||||
# Create Qdrant collection
|
# Create Qdrant collection
|
||||||
if qdrant.collection_exists(collection_name):
|
if qdrant.collection_exists(collection_name):
|
||||||
qdrant.delete_collection(collection_name)
|
qdrant.delete_collection(collection_name)
|
||||||
qdrant.create_collection(
|
qdrant.create_collection(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Store embeddings
|
# Store embeddings
|
||||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
|||||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||||
|
|
||||||
# Initialize Qdrant search tool
|
# Initialize Qdrant search tool
|
||||||
|
from crewai_tools import QdrantConfig
|
||||||
|
|
||||||
qdrant_tool = QdrantVectorSearchTool(
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
qdrant_url=os.getenv("QDRANT_URL"),
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
qdrant_url=os.getenv("QDRANT_URL"),
|
||||||
collection_name=collection_name,
|
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||||
limit=3,
|
collection_name=collection_name,
|
||||||
score_threshold=0.35
|
limit=3,
|
||||||
|
score_threshold=0.35
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create CrewAI agents
|
# Create CrewAI agents
|
||||||
search_agent = Agent(
|
search_agent = Agent(
|
||||||
role="Senior Semantic Search Agent",
|
role="Senior Semantic Search Agent",
|
||||||
goal="Find and analyze documents based on semantic search",
|
goal="Find and analyze documents based on semantic search",
|
||||||
backstory="""You are an expert research assistant who can find relevant
|
backstory="""You are an expert research assistant who can find relevant
|
||||||
information using semantic search in a Qdrant database.""",
|
information using semantic search in a Qdrant database.""",
|
||||||
tools=[qdrant_tool],
|
tools=[qdrant_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
|||||||
answer_agent = Agent(
|
answer_agent = Agent(
|
||||||
role="Senior Answer Assistant",
|
role="Senior Answer Assistant",
|
||||||
goal="Generate answers to questions based on the context provided",
|
goal="Generate answers to questions based on the context provided",
|
||||||
backstory="""You are an expert answer assistant who can generate
|
backstory="""You are an expert answer assistant who can generate
|
||||||
answers to questions based on the context provided.""",
|
answers to questions based on the context provided.""",
|
||||||
tools=[qdrant_tool],
|
tools=[qdrant_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -180,21 +186,82 @@ print(result)
|
|||||||
## 도구 매개변수
|
## 도구 매개변수
|
||||||
|
|
||||||
### 필수 파라미터
|
### 필수 파라미터
|
||||||
- `qdrant_url` (str): Qdrant 서버의 URL
|
- `qdrant_config` (QdrantConfig): 모든 Qdrant 설정을 포함하는 구성 객체
|
||||||
- `qdrant_api_key` (str): Qdrant 인증을 위한 API 키
|
|
||||||
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
|
|
||||||
|
|
||||||
### 선택적 매개변수
|
### QdrantConfig 매개변수
|
||||||
|
- `qdrant_url` (str): Qdrant 서버의 URL
|
||||||
|
- `qdrant_api_key` (str, 선택 사항): Qdrant 인증을 위한 API 키
|
||||||
|
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
|
||||||
- `limit` (int): 반환할 최대 결과 수 (기본값: 3)
|
- `limit` (int): 반환할 최대 결과 수 (기본값: 3)
|
||||||
- `score_threshold` (float): 최소 유사도 점수 임계값 (기본값: 0.35)
|
- `score_threshold` (float): 최소 유사도 점수 임계값 (기본값: 0.35)
|
||||||
|
- `filter` (Any, 선택 사항): 고급 필터링을 위한 Qdrant Filter 인스턴스 (기본값: None)
|
||||||
|
|
||||||
|
### 선택적 도구 매개변수
|
||||||
- `custom_embedding_fn` (Callable[[str], list[float]]): 텍스트 벡터화를 위한 사용자 지정 함수
|
- `custom_embedding_fn` (Callable[[str], list[float]]): 텍스트 벡터화를 위한 사용자 지정 함수
|
||||||
|
- `qdrant_package` (str): Qdrant의 기본 패키지 경로 (기본값: "qdrant_client")
|
||||||
|
- `client` (Any): 사전 초기화된 Qdrant 클라이언트 (선택 사항)
|
||||||
|
|
||||||
|
## 고급 필터링
|
||||||
|
|
||||||
|
QdrantVectorSearchTool은 검색 결과를 세밀하게 조정할 수 있는 강력한 필터링 기능을 지원합니다:
|
||||||
|
|
||||||
|
### 동적 필터링
|
||||||
|
검색 시 `filter_by` 및 `filter_value` 매개변수를 사용하여 즉석에서 결과를 필터링할 수 있습니다:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 에이전트는 도구를 호출할 때 이러한 매개변수를 사용합니다
|
||||||
|
# 도구 스키마는 filter_by 및 filter_value를 허용합니다
|
||||||
|
# 예시: 카테고리 필터를 사용한 검색
|
||||||
|
# 결과는 category == "기술"인 항목으로 필터링됩니다
|
||||||
|
```
|
||||||
|
|
||||||
|
### QdrantConfig를 사용한 사전 설정 필터
|
||||||
|
복잡한 필터링의 경우 구성에서 Qdrant Filter 인스턴스를 사용하세요:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from qdrant_client.http import models as qmodels
|
||||||
|
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||||
|
|
||||||
|
# 특정 조건에 대한 필터 생성
|
||||||
|
preset_filter = qmodels.Filter(
|
||||||
|
must=[
|
||||||
|
qmodels.FieldCondition(
|
||||||
|
key="category",
|
||||||
|
match=qmodels.MatchValue(value="research")
|
||||||
|
),
|
||||||
|
qmodels.FieldCondition(
|
||||||
|
key="year",
|
||||||
|
match=qmodels.MatchValue(value=2024)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# 사전 설정 필터로 도구 초기화
|
||||||
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
|
qdrant_config=QdrantConfig(
|
||||||
|
qdrant_url="your_url",
|
||||||
|
qdrant_api_key="your_key",
|
||||||
|
collection_name="your_collection",
|
||||||
|
filter=preset_filter # 모든 검색에 적용되는 사전 설정 필터
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 필터 결합
|
||||||
|
도구는 `QdrantConfig`의 사전 설정 필터와 `filter_by` 및 `filter_value`의 동적 필터를 자동으로 결합합니다:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# QdrantConfig에 category="research"에 대한 사전 설정 필터가 있고
|
||||||
|
# 검색에서 filter_by="year", filter_value=2024를 사용하는 경우
|
||||||
|
# 두 필터가 모두 결합됩니다 (AND 논리)
|
||||||
|
```
|
||||||
|
|
||||||
## 검색 매개변수
|
## 검색 매개변수
|
||||||
|
|
||||||
이 도구는 스키마에서 다음과 같은 매개변수를 허용합니다:
|
이 도구는 스키마에서 다음과 같은 매개변수를 허용합니다:
|
||||||
- `query` (str): 유사한 문서를 찾기 위한 검색 쿼리
|
- `query` (str): 유사한 문서를 찾기 위한 검색 쿼리
|
||||||
- `filter_by` (str, 선택 사항): 필터링할 메타데이터 필드
|
- `filter_by` (str, 선택 사항): 필터링할 메타데이터 필드
|
||||||
- `filter_value` (str, 선택 사항): 필터 기준 값
|
- `filter_value` (Any, 선택 사항): 필터 기준 값
|
||||||
|
|
||||||
## 반환 형식
|
## 반환 형식
|
||||||
|
|
||||||
@@ -214,7 +281,7 @@ print(result)
|
|||||||
|
|
||||||
## 기본 임베딩
|
## 기본 임베딩
|
||||||
|
|
||||||
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-small` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
|
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-large` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
|
||||||
- 환경변수에 설정된 OpenAI API 키: `OPENAI_API_KEY`
|
- 환경변수에 설정된 OpenAI API 키: `OPENAI_API_KEY`
|
||||||
|
|
||||||
## 커스텀 임베딩
|
## 커스텀 임베딩
|
||||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
|||||||
# Tokenize and get model outputs
|
# Tokenize and get model outputs
|
||||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||||
outputs = model(**inputs)
|
outputs = model(**inputs)
|
||||||
|
|
||||||
# Use mean pooling to get text embedding
|
# Use mean pooling to get text embedding
|
||||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||||
|
|
||||||
# Convert to list of floats and return
|
# Convert to list of floats and return
|
||||||
return embeddings[0].tolist()
|
return embeddings[0].tolist()
|
||||||
|
|
||||||
# Use custom embeddings with the tool
|
# Use custom embeddings with the tool
|
||||||
|
from crewai_tools import QdrantConfig
|
||||||
|
|
||||||
tool = QdrantVectorSearchTool(
|
tool = QdrantVectorSearchTool(
|
||||||
qdrant_url="your_url",
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key="your_key",
|
qdrant_url="your_url",
|
||||||
collection_name="your_collection",
|
qdrant_api_key="your_key",
|
||||||
|
collection_name="your_collection"
|
||||||
|
),
|
||||||
custom_embedding_fn=custom_embeddings # Pass your custom function
|
custom_embedding_fn=custom_embeddings # Pass your custom function
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -270,4 +341,4 @@ tool = QdrantVectorSearchTool(
|
|||||||
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
export QDRANT_URL="your_qdrant_url" # If not provided in constructor
|
||||||
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
export QDRANT_API_KEY="your_api_key" # If not provided in constructor
|
||||||
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
export OPENAI_API_KEY="your_openai_key" # If using default embeddings
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -23,13 +23,15 @@ Veja um exemplo mínimo de como utilizar a ferramenta:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
from crewai import Agent
|
from crewai import Agent
|
||||||
from crewai_tools import QdrantVectorSearchTool
|
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||||
|
|
||||||
# Inicialize a ferramenta
|
# Inicialize a ferramenta com QdrantConfig
|
||||||
qdrant_tool = QdrantVectorSearchTool(
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
qdrant_url="your_qdrant_url",
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key="your_qdrant_api_key",
|
qdrant_url="your_qdrant_url",
|
||||||
collection_name="your_collection"
|
qdrant_api_key="your_qdrant_api_key",
|
||||||
|
collection_name="your_collection"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Crie um agente que utiliza a ferramenta
|
# Crie um agente que utiliza a ferramenta
|
||||||
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
|
|||||||
def get_openai_embedding(text):
|
def get_openai_embedding(text):
|
||||||
response = client.embeddings.create(
|
response = client.embeddings.create(
|
||||||
input=text,
|
input=text,
|
||||||
model="text-embedding-3-small"
|
model="text-embedding-3-large"
|
||||||
)
|
)
|
||||||
return response.data[0].embedding
|
return response.data[0].embedding
|
||||||
|
|
||||||
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
|
|||||||
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
|
||||||
# Extrair texto do PDF
|
# Extrair texto do PDF
|
||||||
text_chunks = extract_text_from_pdf(pdf_path)
|
text_chunks = extract_text_from_pdf(pdf_path)
|
||||||
|
|
||||||
# Criar coleção no Qdrant
|
# Criar coleção no Qdrant
|
||||||
if qdrant.collection_exists(collection_name):
|
if qdrant.collection_exists(collection_name):
|
||||||
qdrant.delete_collection(collection_name)
|
qdrant.delete_collection(collection_name)
|
||||||
qdrant.create_collection(
|
qdrant.create_collection(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
|
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Armazenar embeddings
|
# Armazenar embeddings
|
||||||
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
|
|||||||
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
|
||||||
|
|
||||||
# Inicializar ferramenta de busca Qdrant
|
# Inicializar ferramenta de busca Qdrant
|
||||||
|
from crewai_tools import QdrantConfig
|
||||||
|
|
||||||
qdrant_tool = QdrantVectorSearchTool(
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
qdrant_url=os.getenv("QDRANT_URL"),
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
qdrant_url=os.getenv("QDRANT_URL"),
|
||||||
collection_name=collection_name,
|
qdrant_api_key=os.getenv("QDRANT_API_KEY"),
|
||||||
limit=3,
|
collection_name=collection_name,
|
||||||
score_threshold=0.35
|
limit=3,
|
||||||
|
score_threshold=0.35
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Criar agentes CrewAI
|
# Criar agentes CrewAI
|
||||||
search_agent = Agent(
|
search_agent = Agent(
|
||||||
role="Senior Semantic Search Agent",
|
role="Senior Semantic Search Agent",
|
||||||
goal="Find and analyze documents based on semantic search",
|
goal="Find and analyze documents based on semantic search",
|
||||||
backstory="""You are an expert research assistant who can find relevant
|
backstory="""You are an expert research assistant who can find relevant
|
||||||
information using semantic search in a Qdrant database.""",
|
information using semantic search in a Qdrant database.""",
|
||||||
tools=[qdrant_tool],
|
tools=[qdrant_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -141,7 +147,7 @@ search_agent = Agent(
|
|||||||
answer_agent = Agent(
|
answer_agent = Agent(
|
||||||
role="Senior Answer Assistant",
|
role="Senior Answer Assistant",
|
||||||
goal="Generate answers to questions based on the context provided",
|
goal="Generate answers to questions based on the context provided",
|
||||||
backstory="""You are an expert answer assistant who can generate
|
backstory="""You are an expert answer assistant who can generate
|
||||||
answers to questions based on the context provided.""",
|
answers to questions based on the context provided.""",
|
||||||
tools=[qdrant_tool],
|
tools=[qdrant_tool],
|
||||||
verbose=True
|
verbose=True
|
||||||
@@ -180,21 +186,82 @@ print(result)
|
|||||||
## Parâmetros da Ferramenta
|
## Parâmetros da Ferramenta
|
||||||
|
|
||||||
### Parâmetros Obrigatórios
|
### Parâmetros Obrigatórios
|
||||||
- `qdrant_url` (str): URL do seu servidor Qdrant
|
- `qdrant_config` (QdrantConfig): Objeto de configuração contendo todas as configurações do Qdrant
|
||||||
- `qdrant_api_key` (str): Chave de API para autenticação com o Qdrant
|
|
||||||
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
|
|
||||||
|
|
||||||
### Parâmetros Opcionais
|
### Parâmetros do QdrantConfig
|
||||||
|
- `qdrant_url` (str): URL do seu servidor Qdrant
|
||||||
|
- `qdrant_api_key` (str, opcional): Chave de API para autenticação com o Qdrant
|
||||||
|
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
|
||||||
- `limit` (int): Número máximo de resultados a serem retornados (padrão: 3)
|
- `limit` (int): Número máximo de resultados a serem retornados (padrão: 3)
|
||||||
- `score_threshold` (float): Limite mínimo de similaridade (padrão: 0.35)
|
- `score_threshold` (float): Limite mínimo de similaridade (padrão: 0.35)
|
||||||
|
- `filter` (Any, opcional): Instância de Filter do Qdrant para filtragem avançada (padrão: None)
|
||||||
|
|
||||||
|
### Parâmetros Opcionais da Ferramenta
|
||||||
- `custom_embedding_fn` (Callable[[str], list[float]]): Função personalizada para vetorização de textos
|
- `custom_embedding_fn` (Callable[[str], list[float]]): Função personalizada para vetorização de textos
|
||||||
|
- `qdrant_package` (str): Caminho base do pacote Qdrant (padrão: "qdrant_client")
|
||||||
|
- `client` (Any): Cliente Qdrant pré-inicializado (opcional)
|
||||||
|
|
||||||
|
## Filtragem Avançada
|
||||||
|
|
||||||
|
A ferramenta QdrantVectorSearchTool oferece recursos poderosos de filtragem para refinar os resultados da busca:
|
||||||
|
|
||||||
|
### Filtragem Dinâmica
|
||||||
|
Use os parâmetros `filter_by` e `filter_value` na sua busca para filtrar resultados dinamicamente:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# O agente usará esses parâmetros ao chamar a ferramenta
|
||||||
|
# O schema da ferramenta aceita filter_by e filter_value
|
||||||
|
# Exemplo: busca com filtro de categoria
|
||||||
|
# Os resultados serão filtrados onde categoria == "tecnologia"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Filtros Pré-definidos com QdrantConfig
|
||||||
|
Para filtragens complexas, use instâncias de Filter do Qdrant na sua configuração:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from qdrant_client.http import models as qmodels
|
||||||
|
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
|
||||||
|
|
||||||
|
# Criar um filtro para condições específicas
|
||||||
|
preset_filter = qmodels.Filter(
|
||||||
|
must=[
|
||||||
|
qmodels.FieldCondition(
|
||||||
|
key="categoria",
|
||||||
|
match=qmodels.MatchValue(value="pesquisa")
|
||||||
|
),
|
||||||
|
qmodels.FieldCondition(
|
||||||
|
key="ano",
|
||||||
|
match=qmodels.MatchValue(value=2024)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Inicializar ferramenta com filtro pré-definido
|
||||||
|
qdrant_tool = QdrantVectorSearchTool(
|
||||||
|
qdrant_config=QdrantConfig(
|
||||||
|
qdrant_url="your_url",
|
||||||
|
qdrant_api_key="your_key",
|
||||||
|
collection_name="your_collection",
|
||||||
|
filter=preset_filter # Filtro pré-definido aplicado a todas as buscas
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Combinando Filtros
|
||||||
|
A ferramenta combina automaticamente os filtros pré-definidos do `QdrantConfig` com os filtros dinâmicos de `filter_by` e `filter_value`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Se QdrantConfig tem um filtro pré-definido para categoria="pesquisa"
|
||||||
|
# E a busca usa filter_by="ano", filter_value=2024
|
||||||
|
# Ambos os filtros serão combinados (lógica AND)
|
||||||
|
```
|
||||||
|
|
||||||
## Parâmetros de Busca
|
## Parâmetros de Busca
|
||||||
|
|
||||||
A ferramenta aceita estes parâmetros em seu schema:
|
A ferramenta aceita estes parâmetros em seu schema:
|
||||||
- `query` (str): Consulta de busca para encontrar documentos similares
|
- `query` (str): Consulta de busca para encontrar documentos similares
|
||||||
- `filter_by` (str, opcional): Campo de metadado para filtrar
|
- `filter_by` (str, opcional): Campo de metadado para filtrar
|
||||||
- `filter_value` (str, opcional): Valor para filtrar
|
- `filter_value` (Any, opcional): Valor para filtrar
|
||||||
|
|
||||||
## Formato de Retorno
|
## Formato de Retorno
|
||||||
|
|
||||||
@@ -214,7 +281,7 @@ A ferramenta retorna resultados no formato JSON:
|
|||||||
|
|
||||||
## Embedding Padrão
|
## Embedding Padrão
|
||||||
|
|
||||||
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-small` da OpenAI para vetorização. Isso requer:
|
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-large` da OpenAI para vetorização. Isso requer:
|
||||||
- Chave de API da OpenAI definida na variável de ambiente: `OPENAI_API_KEY`
|
- Chave de API da OpenAI definida na variável de ambiente: `OPENAI_API_KEY`
|
||||||
|
|
||||||
## Embeddings Personalizados
|
## Embeddings Personalizados
|
||||||
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
|
|||||||
# Tokenizar e obter saídas do modelo
|
# Tokenizar e obter saídas do modelo
|
||||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
||||||
outputs = model(**inputs)
|
outputs = model(**inputs)
|
||||||
|
|
||||||
# Usar mean pooling para obter o embedding do texto
|
# Usar mean pooling para obter o embedding do texto
|
||||||
embeddings = outputs.last_hidden_state.mean(dim=1)
|
embeddings = outputs.last_hidden_state.mean(dim=1)
|
||||||
|
|
||||||
# Converter para lista de floats e retornar
|
# Converter para lista de floats e retornar
|
||||||
return embeddings[0].tolist()
|
return embeddings[0].tolist()
|
||||||
|
|
||||||
# Usar embeddings personalizados com a ferramenta
|
# Usar embeddings personalizados com a ferramenta
|
||||||
|
from crewai_tools import QdrantConfig
|
||||||
|
|
||||||
tool = QdrantVectorSearchTool(
|
tool = QdrantVectorSearchTool(
|
||||||
qdrant_url="your_url",
|
qdrant_config=QdrantConfig(
|
||||||
qdrant_api_key="your_key",
|
qdrant_url="your_url",
|
||||||
collection_name="your_collection",
|
qdrant_api_key="your_key",
|
||||||
|
collection_name="your_collection"
|
||||||
|
),
|
||||||
custom_embedding_fn=custom_embeddings # Passe sua função personalizada
|
custom_embedding_fn=custom_embeddings # Passe sua função personalizada
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
@@ -270,4 +341,4 @@ Variáveis de ambiente obrigatórias:
|
|||||||
export QDRANT_URL="your_qdrant_url" # Se não for informado no construtor
|
export QDRANT_URL="your_qdrant_url" # Se não for informado no construtor
|
||||||
export QDRANT_API_KEY="your_api_key" # Se não for informado no construtor
|
export QDRANT_API_KEY="your_api_key" # Se não for informado no construtor
|
||||||
export OPENAI_API_KEY="your_openai_key" # Se estiver usando embeddings padrão
|
export OPENAI_API_KEY="your_openai_key" # Se estiver usando embeddings padrão
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from collections.abc import Callable
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from crewai.tools import BaseTool, EnvVar
|
from crewai.tools import BaseTool, EnvVar
|
||||||
@@ -12,9 +12,13 @@ from pydantic.types import ImportString
|
|||||||
|
|
||||||
|
|
||||||
class QdrantToolSchema(BaseModel):
|
class QdrantToolSchema(BaseModel):
|
||||||
query: str = Field(..., description="Query to search in Qdrant DB.")
|
query: str = Field(..., description="Query to search in Qdrant DB")
|
||||||
filter_by: str | None = None
|
filter_by: str | None = Field(
|
||||||
filter_value: str | None = None
|
default=None, description="Parameter to filter the search by."
|
||||||
|
)
|
||||||
|
filter_value: Any | None = Field(
|
||||||
|
default=None, description="Value to filter the search by."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class QdrantConfig(BaseModel):
|
class QdrantConfig(BaseModel):
|
||||||
@@ -25,7 +29,9 @@ class QdrantConfig(BaseModel):
|
|||||||
collection_name: str
|
collection_name: str
|
||||||
limit: int = 3
|
limit: int = 3
|
||||||
score_threshold: float = 0.35
|
score_threshold: float = 0.35
|
||||||
filter_conditions: list[tuple[str, Any]] = Field(default_factory=list)
|
filter: Any | None = Field(
|
||||||
|
default=None, description="Qdrant Filter instance for advanced filtering."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class QdrantVectorSearchTool(BaseTool):
|
class QdrantVectorSearchTool(BaseTool):
|
||||||
@@ -76,23 +82,26 @@ class QdrantVectorSearchTool(BaseTool):
|
|||||||
filter_value: Any | None = None,
|
filter_value: Any | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Perform vector similarity search."""
|
"""Perform vector similarity search."""
|
||||||
filter_ = self.qdrant_package.http.models.Filter
|
|
||||||
field_condition = self.qdrant_package.http.models.FieldCondition
|
|
||||||
match_value = self.qdrant_package.http.models.MatchValue
|
|
||||||
conditions = self.qdrant_config.filter_conditions.copy()
|
|
||||||
if filter_by and filter_value is not None:
|
|
||||||
conditions.append((filter_by, filter_value))
|
|
||||||
|
|
||||||
search_filter = (
|
search_filter = (
|
||||||
filter_(
|
self.qdrant_config.filter.model_copy()
|
||||||
must=[
|
if self.qdrant_config.filter is not None
|
||||||
field_condition(key=k, match=match_value(value=v))
|
else self.qdrant_package.http.models.Filter(must=[])
|
||||||
for k, v in conditions
|
|
||||||
]
|
|
||||||
)
|
|
||||||
if conditions
|
|
||||||
else None
|
|
||||||
)
|
)
|
||||||
|
if filter_by and filter_value is not None:
|
||||||
|
if not hasattr(search_filter, "must") or not isinstance(
|
||||||
|
search_filter.must, list
|
||||||
|
):
|
||||||
|
search_filter.must = []
|
||||||
|
search_filter.must.append(
|
||||||
|
self.qdrant_package.http.models.FieldCondition(
|
||||||
|
key=filter_by,
|
||||||
|
match=self.qdrant_package.http.models.MatchValue(
|
||||||
|
value=filter_value
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
query_vector = (
|
query_vector = (
|
||||||
self.custom_embedding_fn(query)
|
self.custom_embedding_fn(query)
|
||||||
if self.custom_embedding_fn
|
if self.custom_embedding_fn
|
||||||
|
|||||||
Reference in New Issue
Block a user