Enhance QdrantVectorSearchTool (#3806)
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled

This commit is contained in:
Daniel Barreto
2025-10-28 14:42:40 -03:00
committed by GitHub
parent 410db1ff39
commit 70b083945f
4 changed files with 322 additions and 100 deletions

View File

@@ -23,13 +23,15 @@ Here's a minimal example of how to use the tool:
```python ```python
from crewai import Agent from crewai import Agent
from crewai_tools import QdrantVectorSearchTool from crewai_tools import QdrantVectorSearchTool, QdrantConfig
# Initialize the tool # Initialize the tool with QdrantConfig
qdrant_tool = QdrantVectorSearchTool( qdrant_tool = QdrantVectorSearchTool(
qdrant_url="your_qdrant_url", qdrant_config=QdrantConfig(
qdrant_api_key="your_qdrant_api_key", qdrant_url="your_qdrant_url",
collection_name="your_collection" qdrant_api_key="your_qdrant_api_key",
collection_name="your_collection"
)
) )
# Create an agent that uses the tool # Create an agent that uses the tool
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
def get_openai_embedding(text): def get_openai_embedding(text):
response = client.embeddings.create( response = client.embeddings.create(
input=text, input=text,
model="text-embedding-3-small" model="text-embedding-3-large"
) )
return response.data[0].embedding return response.data[0].embedding
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name): def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
# Extract text from PDF # Extract text from PDF
text_chunks = extract_text_from_pdf(pdf_path) text_chunks = extract_text_from_pdf(pdf_path)
# Create Qdrant collection # Create Qdrant collection
if qdrant.collection_exists(collection_name): if qdrant.collection_exists(collection_name):
qdrant.delete_collection(collection_name) qdrant.delete_collection(collection_name)
qdrant.create_collection( qdrant.create_collection(
collection_name=collection_name, collection_name=collection_name,
vectors_config=VectorParams(size=1536, distance=Distance.COSINE) vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
) )
# Store embeddings # Store embeddings
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
load_pdf_to_qdrant(pdf_path, qdrant, collection_name) load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
# Initialize Qdrant search tool # Initialize Qdrant search tool
from crewai_tools import QdrantConfig
qdrant_tool = QdrantVectorSearchTool( qdrant_tool = QdrantVectorSearchTool(
qdrant_url=os.getenv("QDRANT_URL"), qdrant_config=QdrantConfig(
qdrant_api_key=os.getenv("QDRANT_API_KEY"), qdrant_url=os.getenv("QDRANT_URL"),
collection_name=collection_name, qdrant_api_key=os.getenv("QDRANT_API_KEY"),
limit=3, collection_name=collection_name,
score_threshold=0.35 limit=3,
score_threshold=0.35
)
) )
# Create CrewAI agents # Create CrewAI agents
search_agent = Agent( search_agent = Agent(
role="Senior Semantic Search Agent", role="Senior Semantic Search Agent",
goal="Find and analyze documents based on semantic search", goal="Find and analyze documents based on semantic search",
backstory="""You are an expert research assistant who can find relevant backstory="""You are an expert research assistant who can find relevant
information using semantic search in a Qdrant database.""", information using semantic search in a Qdrant database.""",
tools=[qdrant_tool], tools=[qdrant_tool],
verbose=True verbose=True
@@ -141,7 +147,7 @@ search_agent = Agent(
answer_agent = Agent( answer_agent = Agent(
role="Senior Answer Assistant", role="Senior Answer Assistant",
goal="Generate answers to questions based on the context provided", goal="Generate answers to questions based on the context provided",
backstory="""You are an expert answer assistant who can generate backstory="""You are an expert answer assistant who can generate
answers to questions based on the context provided.""", answers to questions based on the context provided.""",
tools=[qdrant_tool], tools=[qdrant_tool],
verbose=True verbose=True
@@ -180,21 +186,82 @@ print(result)
## Tool Parameters ## Tool Parameters
### Required Parameters ### Required Parameters
- `qdrant_url` (str): The URL of your Qdrant server - `qdrant_config` (QdrantConfig): Configuration object containing all Qdrant settings
- `qdrant_api_key` (str): API key for authentication with Qdrant
- `collection_name` (str): Name of the Qdrant collection to search
### Optional Parameters ### QdrantConfig Parameters
- `qdrant_url` (str): The URL of your Qdrant server
- `qdrant_api_key` (str, optional): API key for authentication with Qdrant
- `collection_name` (str): Name of the Qdrant collection to search
- `limit` (int): Maximum number of results to return (default: 3) - `limit` (int): Maximum number of results to return (default: 3)
- `score_threshold` (float): Minimum similarity score threshold (default: 0.35) - `score_threshold` (float): Minimum similarity score threshold (default: 0.35)
- `filter` (Any, optional): Qdrant Filter instance for advanced filtering (default: None)
### Optional Tool Parameters
- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization - `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization
- `qdrant_package` (str): Base package path for Qdrant (default: "qdrant_client")
- `client` (Any): Pre-initialized Qdrant client (optional)
## Advanced Filtering
The QdrantVectorSearchTool supports powerful filtering capabilities to refine your search results:
### Dynamic Filtering
Use `filter_by` and `filter_value` parameters in your search to filter results on-the-fly:
```python
# Agent will use these parameters when calling the tool
# The tool schema accepts filter_by and filter_value
# Example: search with category filter
# Results will be filtered where category == "technology"
```
### Preset Filters with QdrantConfig
For complex filtering, use Qdrant Filter instances in your configuration:
```python
from qdrant_client.http import models as qmodels
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
# Create a filter for specific conditions
preset_filter = qmodels.Filter(
must=[
qmodels.FieldCondition(
key="category",
match=qmodels.MatchValue(value="research")
),
qmodels.FieldCondition(
key="year",
match=qmodels.MatchValue(value=2024)
)
]
)
# Initialize tool with preset filter
qdrant_tool = QdrantVectorSearchTool(
qdrant_config=QdrantConfig(
qdrant_url="your_url",
qdrant_api_key="your_key",
collection_name="your_collection",
filter=preset_filter # Preset filter applied to all searches
)
)
```
### Combining Filters
The tool automatically combines preset filters from `QdrantConfig` with dynamic filters from `filter_by` and `filter_value`:
```python
# If QdrantConfig has a preset filter for category="research"
# And the search uses filter_by="year", filter_value=2024
# Both filters will be combined (AND logic)
```
## Search Parameters ## Search Parameters
The tool accepts these parameters in its schema: The tool accepts these parameters in its schema:
- `query` (str): The search query to find similar documents - `query` (str): The search query to find similar documents
- `filter_by` (str, optional): Metadata field to filter on - `filter_by` (str, optional): Metadata field to filter on
- `filter_value` (str, optional): Value to filter by - `filter_value` (Any, optional): Value to filter by
## Return Format ## Return Format
@@ -214,7 +281,7 @@ The tool returns results in JSON format:
## Default Embedding ## Default Embedding
By default, the tool uses OpenAI's `text-embedding-3-small` model for vectorization. This requires: By default, the tool uses OpenAI's `text-embedding-3-large` model for vectorization. This requires:
- OpenAI API key set in environment: `OPENAI_API_KEY` - OpenAI API key set in environment: `OPENAI_API_KEY`
## Custom Embeddings ## Custom Embeddings
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
# Tokenize and get model outputs # Tokenize and get model outputs
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs) outputs = model(**inputs)
# Use mean pooling to get text embedding # Use mean pooling to get text embedding
embeddings = outputs.last_hidden_state.mean(dim=1) embeddings = outputs.last_hidden_state.mean(dim=1)
# Convert to list of floats and return # Convert to list of floats and return
return embeddings[0].tolist() return embeddings[0].tolist()
# Use custom embeddings with the tool # Use custom embeddings with the tool
from crewai_tools import QdrantConfig
tool = QdrantVectorSearchTool( tool = QdrantVectorSearchTool(
qdrant_url="your_url", qdrant_config=QdrantConfig(
qdrant_api_key="your_key", qdrant_url="your_url",
collection_name="your_collection", qdrant_api_key="your_key",
collection_name="your_collection"
),
custom_embedding_fn=custom_embeddings # Pass your custom function custom_embedding_fn=custom_embeddings # Pass your custom function
) )
``` ```
@@ -269,4 +340,4 @@ Required environment variables:
```bash ```bash
export QDRANT_URL="your_qdrant_url" # If not provided in constructor export QDRANT_URL="your_qdrant_url" # If not provided in constructor
export QDRANT_API_KEY="your_api_key" # If not provided in constructor export QDRANT_API_KEY="your_api_key" # If not provided in constructor
export OPENAI_API_KEY="your_openai_key" # If using default embeddings export OPENAI_API_KEY="your_openai_key" # If using default embeddings

View File

@@ -23,13 +23,15 @@ uv add qdrant-client
```python ```python
from crewai import Agent from crewai import Agent
from crewai_tools import QdrantVectorSearchTool from crewai_tools import QdrantVectorSearchTool, QdrantConfig
# Initialize the tool # QdrantConfig로 도구 초기화
qdrant_tool = QdrantVectorSearchTool( qdrant_tool = QdrantVectorSearchTool(
qdrant_url="your_qdrant_url", qdrant_config=QdrantConfig(
qdrant_api_key="your_qdrant_api_key", qdrant_url="your_qdrant_url",
collection_name="your_collection" qdrant_api_key="your_qdrant_api_key",
collection_name="your_collection"
)
) )
# Create an agent that uses the tool # Create an agent that uses the tool
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
def get_openai_embedding(text): def get_openai_embedding(text):
response = client.embeddings.create( response = client.embeddings.create(
input=text, input=text,
model="text-embedding-3-small" model="text-embedding-3-large"
) )
return response.data[0].embedding return response.data[0].embedding
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name): def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
# Extract text from PDF # Extract text from PDF
text_chunks = extract_text_from_pdf(pdf_path) text_chunks = extract_text_from_pdf(pdf_path)
# Create Qdrant collection # Create Qdrant collection
if qdrant.collection_exists(collection_name): if qdrant.collection_exists(collection_name):
qdrant.delete_collection(collection_name) qdrant.delete_collection(collection_name)
qdrant.create_collection( qdrant.create_collection(
collection_name=collection_name, collection_name=collection_name,
vectors_config=VectorParams(size=1536, distance=Distance.COSINE) vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
) )
# Store embeddings # Store embeddings
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
load_pdf_to_qdrant(pdf_path, qdrant, collection_name) load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
# Initialize Qdrant search tool # Initialize Qdrant search tool
from crewai_tools import QdrantConfig
qdrant_tool = QdrantVectorSearchTool( qdrant_tool = QdrantVectorSearchTool(
qdrant_url=os.getenv("QDRANT_URL"), qdrant_config=QdrantConfig(
qdrant_api_key=os.getenv("QDRANT_API_KEY"), qdrant_url=os.getenv("QDRANT_URL"),
collection_name=collection_name, qdrant_api_key=os.getenv("QDRANT_API_KEY"),
limit=3, collection_name=collection_name,
score_threshold=0.35 limit=3,
score_threshold=0.35
)
) )
# Create CrewAI agents # Create CrewAI agents
search_agent = Agent( search_agent = Agent(
role="Senior Semantic Search Agent", role="Senior Semantic Search Agent",
goal="Find and analyze documents based on semantic search", goal="Find and analyze documents based on semantic search",
backstory="""You are an expert research assistant who can find relevant backstory="""You are an expert research assistant who can find relevant
information using semantic search in a Qdrant database.""", information using semantic search in a Qdrant database.""",
tools=[qdrant_tool], tools=[qdrant_tool],
verbose=True verbose=True
@@ -141,7 +147,7 @@ search_agent = Agent(
answer_agent = Agent( answer_agent = Agent(
role="Senior Answer Assistant", role="Senior Answer Assistant",
goal="Generate answers to questions based on the context provided", goal="Generate answers to questions based on the context provided",
backstory="""You are an expert answer assistant who can generate backstory="""You are an expert answer assistant who can generate
answers to questions based on the context provided.""", answers to questions based on the context provided.""",
tools=[qdrant_tool], tools=[qdrant_tool],
verbose=True verbose=True
@@ -180,21 +186,82 @@ print(result)
## 도구 매개변수 ## 도구 매개변수
### 필수 파라미터 ### 필수 파라미터
- `qdrant_url` (str): Qdrant 서버의 URL - `qdrant_config` (QdrantConfig): 모든 Qdrant 설정을 포함하는 구성 객체
- `qdrant_api_key` (str): Qdrant 인증을 위한 API 키
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
### 선택적 매개변수 ### QdrantConfig 매개변수
- `qdrant_url` (str): Qdrant 서버의 URL
- `qdrant_api_key` (str, 선택 사항): Qdrant 인증을 위한 API 키
- `collection_name` (str): 검색할 Qdrant 컬렉션의 이름
- `limit` (int): 반환할 최대 결과 수 (기본값: 3) - `limit` (int): 반환할 최대 결과 수 (기본값: 3)
- `score_threshold` (float): 최소 유사도 점수 임계값 (기본값: 0.35) - `score_threshold` (float): 최소 유사도 점수 임계값 (기본값: 0.35)
- `filter` (Any, 선택 사항): 고급 필터링을 위한 Qdrant Filter 인스턴스 (기본값: None)
### 선택적 도구 매개변수
- `custom_embedding_fn` (Callable[[str], list[float]]): 텍스트 벡터화를 위한 사용자 지정 함수 - `custom_embedding_fn` (Callable[[str], list[float]]): 텍스트 벡터화를 위한 사용자 지정 함수
- `qdrant_package` (str): Qdrant의 기본 패키지 경로 (기본값: "qdrant_client")
- `client` (Any): 사전 초기화된 Qdrant 클라이언트 (선택 사항)
## 고급 필터링
QdrantVectorSearchTool은 검색 결과를 세밀하게 조정할 수 있는 강력한 필터링 기능을 지원합니다:
### 동적 필터링
검색 시 `filter_by` 및 `filter_value` 매개변수를 사용하여 즉석에서 결과를 필터링할 수 있습니다:
```python
# 에이전트는 도구를 호출할 때 이러한 매개변수를 사용합니다
# 도구 스키마는 filter_by 및 filter_value를 허용합니다
# 예시: 카테고리 필터를 사용한 검색
# 결과는 category == "기술"인 항목으로 필터링됩니다
```
### QdrantConfig를 사용한 사전 설정 필터
복잡한 필터링의 경우 구성에서 Qdrant Filter 인스턴스를 사용하세요:
```python
from qdrant_client.http import models as qmodels
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
# 특정 조건에 대한 필터 생성
preset_filter = qmodels.Filter(
must=[
qmodels.FieldCondition(
key="category",
match=qmodels.MatchValue(value="research")
),
qmodels.FieldCondition(
key="year",
match=qmodels.MatchValue(value=2024)
)
]
)
# 사전 설정 필터로 도구 초기화
qdrant_tool = QdrantVectorSearchTool(
qdrant_config=QdrantConfig(
qdrant_url="your_url",
qdrant_api_key="your_key",
collection_name="your_collection",
filter=preset_filter # 모든 검색에 적용되는 사전 설정 필터
)
)
```
### 필터 결합
도구는 `QdrantConfig`의 사전 설정 필터와 `filter_by` 및 `filter_value`의 동적 필터를 자동으로 결합합니다:
```python
# QdrantConfig에 category="research"에 대한 사전 설정 필터가 있고
# 검색에서 filter_by="year", filter_value=2024를 사용하는 경우
# 두 필터가 모두 결합됩니다 (AND 논리)
```
## 검색 매개변수 ## 검색 매개변수
이 도구는 스키마에서 다음과 같은 매개변수를 허용합니다: 이 도구는 스키마에서 다음과 같은 매개변수를 허용합니다:
- `query` (str): 유사한 문서를 찾기 위한 검색 쿼리 - `query` (str): 유사한 문서를 찾기 위한 검색 쿼리
- `filter_by` (str, 선택 사항): 필터링할 메타데이터 필드 - `filter_by` (str, 선택 사항): 필터링할 메타데이터 필드
- `filter_value` (str, 선택 사항): 필터 기준 값 - `filter_value` (Any, 선택 사항): 필터 기준 값
## 반환 형식 ## 반환 형식
@@ -214,7 +281,7 @@ print(result)
## 기본 임베딩 ## 기본 임베딩
기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-small` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다: 기본적으로, 이 도구는 벡터화를 위해 OpenAI의 `text-embedding-3-large` 모델을 사용합니다. 이를 위해서는 다음이 필요합니다:
- 환경변수에 설정된 OpenAI API 키: `OPENAI_API_KEY` - 환경변수에 설정된 OpenAI API 키: `OPENAI_API_KEY`
## 커스텀 임베딩 ## 커스텀 임베딩
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
# Tokenize and get model outputs # Tokenize and get model outputs
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs) outputs = model(**inputs)
# Use mean pooling to get text embedding # Use mean pooling to get text embedding
embeddings = outputs.last_hidden_state.mean(dim=1) embeddings = outputs.last_hidden_state.mean(dim=1)
# Convert to list of floats and return # Convert to list of floats and return
return embeddings[0].tolist() return embeddings[0].tolist()
# Use custom embeddings with the tool # Use custom embeddings with the tool
from crewai_tools import QdrantConfig
tool = QdrantVectorSearchTool( tool = QdrantVectorSearchTool(
qdrant_url="your_url", qdrant_config=QdrantConfig(
qdrant_api_key="your_key", qdrant_url="your_url",
collection_name="your_collection", qdrant_api_key="your_key",
collection_name="your_collection"
),
custom_embedding_fn=custom_embeddings # Pass your custom function custom_embedding_fn=custom_embeddings # Pass your custom function
) )
``` ```
@@ -270,4 +341,4 @@ tool = QdrantVectorSearchTool(
export QDRANT_URL="your_qdrant_url" # If not provided in constructor export QDRANT_URL="your_qdrant_url" # If not provided in constructor
export QDRANT_API_KEY="your_api_key" # If not provided in constructor export QDRANT_API_KEY="your_api_key" # If not provided in constructor
export OPENAI_API_KEY="your_openai_key" # If using default embeddings export OPENAI_API_KEY="your_openai_key" # If using default embeddings
``` ```

View File

@@ -23,13 +23,15 @@ Veja um exemplo mínimo de como utilizar a ferramenta:
```python ```python
from crewai import Agent from crewai import Agent
from crewai_tools import QdrantVectorSearchTool from crewai_tools import QdrantVectorSearchTool, QdrantConfig
# Inicialize a ferramenta # Inicialize a ferramenta com QdrantConfig
qdrant_tool = QdrantVectorSearchTool( qdrant_tool = QdrantVectorSearchTool(
qdrant_url="your_qdrant_url", qdrant_config=QdrantConfig(
qdrant_api_key="your_qdrant_api_key", qdrant_url="your_qdrant_url",
collection_name="your_collection" qdrant_api_key="your_qdrant_api_key",
collection_name="your_collection"
)
) )
# Crie um agente que utiliza a ferramenta # Crie um agente que utiliza a ferramenta
@@ -82,7 +84,7 @@ def extract_text_from_pdf(pdf_path):
def get_openai_embedding(text): def get_openai_embedding(text):
response = client.embeddings.create( response = client.embeddings.create(
input=text, input=text,
model="text-embedding-3-small" model="text-embedding-3-large"
) )
return response.data[0].embedding return response.data[0].embedding
@@ -90,13 +92,13 @@ def get_openai_embedding(text):
def load_pdf_to_qdrant(pdf_path, qdrant, collection_name): def load_pdf_to_qdrant(pdf_path, qdrant, collection_name):
# Extrair texto do PDF # Extrair texto do PDF
text_chunks = extract_text_from_pdf(pdf_path) text_chunks = extract_text_from_pdf(pdf_path)
# Criar coleção no Qdrant # Criar coleção no Qdrant
if qdrant.collection_exists(collection_name): if qdrant.collection_exists(collection_name):
qdrant.delete_collection(collection_name) qdrant.delete_collection(collection_name)
qdrant.create_collection( qdrant.create_collection(
collection_name=collection_name, collection_name=collection_name,
vectors_config=VectorParams(size=1536, distance=Distance.COSINE) vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
) )
# Armazenar embeddings # Armazenar embeddings
@@ -120,19 +122,23 @@ pdf_path = "path/to/your/document.pdf"
load_pdf_to_qdrant(pdf_path, qdrant, collection_name) load_pdf_to_qdrant(pdf_path, qdrant, collection_name)
# Inicializar ferramenta de busca Qdrant # Inicializar ferramenta de busca Qdrant
from crewai_tools import QdrantConfig
qdrant_tool = QdrantVectorSearchTool( qdrant_tool = QdrantVectorSearchTool(
qdrant_url=os.getenv("QDRANT_URL"), qdrant_config=QdrantConfig(
qdrant_api_key=os.getenv("QDRANT_API_KEY"), qdrant_url=os.getenv("QDRANT_URL"),
collection_name=collection_name, qdrant_api_key=os.getenv("QDRANT_API_KEY"),
limit=3, collection_name=collection_name,
score_threshold=0.35 limit=3,
score_threshold=0.35
)
) )
# Criar agentes CrewAI # Criar agentes CrewAI
search_agent = Agent( search_agent = Agent(
role="Senior Semantic Search Agent", role="Senior Semantic Search Agent",
goal="Find and analyze documents based on semantic search", goal="Find and analyze documents based on semantic search",
backstory="""You are an expert research assistant who can find relevant backstory="""You are an expert research assistant who can find relevant
information using semantic search in a Qdrant database.""", information using semantic search in a Qdrant database.""",
tools=[qdrant_tool], tools=[qdrant_tool],
verbose=True verbose=True
@@ -141,7 +147,7 @@ search_agent = Agent(
answer_agent = Agent( answer_agent = Agent(
role="Senior Answer Assistant", role="Senior Answer Assistant",
goal="Generate answers to questions based on the context provided", goal="Generate answers to questions based on the context provided",
backstory="""You are an expert answer assistant who can generate backstory="""You are an expert answer assistant who can generate
answers to questions based on the context provided.""", answers to questions based on the context provided.""",
tools=[qdrant_tool], tools=[qdrant_tool],
verbose=True verbose=True
@@ -180,21 +186,82 @@ print(result)
## Parâmetros da Ferramenta ## Parâmetros da Ferramenta
### Parâmetros Obrigatórios ### Parâmetros Obrigatórios
- `qdrant_url` (str): URL do seu servidor Qdrant - `qdrant_config` (QdrantConfig): Objeto de configuração contendo todas as configurações do Qdrant
- `qdrant_api_key` (str): Chave de API para autenticação com o Qdrant
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
### Parâmetros Opcionais ### Parâmetros do QdrantConfig
- `qdrant_url` (str): URL do seu servidor Qdrant
- `qdrant_api_key` (str, opcional): Chave de API para autenticação com o Qdrant
- `collection_name` (str): Nome da coleção Qdrant a ser pesquisada
- `limit` (int): Número máximo de resultados a serem retornados (padrão: 3) - `limit` (int): Número máximo de resultados a serem retornados (padrão: 3)
- `score_threshold` (float): Limite mínimo de similaridade (padrão: 0.35) - `score_threshold` (float): Limite mínimo de similaridade (padrão: 0.35)
- `filter` (Any, opcional): Instância de Filter do Qdrant para filtragem avançada (padrão: None)
### Parâmetros Opcionais da Ferramenta
- `custom_embedding_fn` (Callable[[str], list[float]]): Função personalizada para vetorização de textos - `custom_embedding_fn` (Callable[[str], list[float]]): Função personalizada para vetorização de textos
- `qdrant_package` (str): Caminho base do pacote Qdrant (padrão: "qdrant_client")
- `client` (Any): Cliente Qdrant pré-inicializado (opcional)
## Filtragem Avançada
A ferramenta QdrantVectorSearchTool oferece recursos poderosos de filtragem para refinar os resultados da busca:
### Filtragem Dinâmica
Use os parâmetros `filter_by` e `filter_value` na sua busca para filtrar resultados dinamicamente:
```python
# O agente usará esses parâmetros ao chamar a ferramenta
# O schema da ferramenta aceita filter_by e filter_value
# Exemplo: busca com filtro de categoria
# Os resultados serão filtrados onde categoria == "tecnologia"
```
### Filtros Pré-definidos com QdrantConfig
Para filtragens complexas, use instâncias de Filter do Qdrant na sua configuração:
```python
from qdrant_client.http import models as qmodels
from crewai_tools import QdrantVectorSearchTool, QdrantConfig
# Criar um filtro para condições específicas
preset_filter = qmodels.Filter(
must=[
qmodels.FieldCondition(
key="categoria",
match=qmodels.MatchValue(value="pesquisa")
),
qmodels.FieldCondition(
key="ano",
match=qmodels.MatchValue(value=2024)
)
]
)
# Inicializar ferramenta com filtro pré-definido
qdrant_tool = QdrantVectorSearchTool(
qdrant_config=QdrantConfig(
qdrant_url="your_url",
qdrant_api_key="your_key",
collection_name="your_collection",
filter=preset_filter # Filtro pré-definido aplicado a todas as buscas
)
)
```
### Combinando Filtros
A ferramenta combina automaticamente os filtros pré-definidos do `QdrantConfig` com os filtros dinâmicos de `filter_by` e `filter_value`:
```python
# Se QdrantConfig tem um filtro pré-definido para categoria="pesquisa"
# E a busca usa filter_by="ano", filter_value=2024
# Ambos os filtros serão combinados (lógica AND)
```
## Parâmetros de Busca ## Parâmetros de Busca
A ferramenta aceita estes parâmetros em seu schema: A ferramenta aceita estes parâmetros em seu schema:
- `query` (str): Consulta de busca para encontrar documentos similares - `query` (str): Consulta de busca para encontrar documentos similares
- `filter_by` (str, opcional): Campo de metadado para filtrar - `filter_by` (str, opcional): Campo de metadado para filtrar
- `filter_value` (str, opcional): Valor para filtrar - `filter_value` (Any, opcional): Valor para filtrar
## Formato de Retorno ## Formato de Retorno
@@ -214,7 +281,7 @@ A ferramenta retorna resultados no formato JSON:
## Embedding Padrão ## Embedding Padrão
Por padrão, a ferramenta utiliza o modelo `text-embedding-3-small` da OpenAI para vetorização. Isso requer: Por padrão, a ferramenta utiliza o modelo `text-embedding-3-large` da OpenAI para vetorização. Isso requer:
- Chave de API da OpenAI definida na variável de ambiente: `OPENAI_API_KEY` - Chave de API da OpenAI definida na variável de ambiente: `OPENAI_API_KEY`
## Embeddings Personalizados ## Embeddings Personalizados
@@ -240,18 +307,22 @@ def custom_embeddings(text: str) -> list[float]:
# Tokenizar e obter saídas do modelo # Tokenizar e obter saídas do modelo
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs) outputs = model(**inputs)
# Usar mean pooling para obter o embedding do texto # Usar mean pooling para obter o embedding do texto
embeddings = outputs.last_hidden_state.mean(dim=1) embeddings = outputs.last_hidden_state.mean(dim=1)
# Converter para lista de floats e retornar # Converter para lista de floats e retornar
return embeddings[0].tolist() return embeddings[0].tolist()
# Usar embeddings personalizados com a ferramenta # Usar embeddings personalizados com a ferramenta
from crewai_tools import QdrantConfig
tool = QdrantVectorSearchTool( tool = QdrantVectorSearchTool(
qdrant_url="your_url", qdrant_config=QdrantConfig(
qdrant_api_key="your_key", qdrant_url="your_url",
collection_name="your_collection", qdrant_api_key="your_key",
collection_name="your_collection"
),
custom_embedding_fn=custom_embeddings # Passe sua função personalizada custom_embedding_fn=custom_embeddings # Passe sua função personalizada
) )
``` ```
@@ -270,4 +341,4 @@ Variáveis de ambiente obrigatórias:
export QDRANT_URL="your_qdrant_url" # Se não for informado no construtor export QDRANT_URL="your_qdrant_url" # Se não for informado no construtor
export QDRANT_API_KEY="your_api_key" # Se não for informado no construtor export QDRANT_API_KEY="your_api_key" # Se não for informado no construtor
export OPENAI_API_KEY="your_openai_key" # Se estiver usando embeddings padrão export OPENAI_API_KEY="your_openai_key" # Se estiver usando embeddings padrão
``` ```

View File

@@ -1,9 +1,9 @@
from __future__ import annotations from __future__ import annotations
from collections.abc import Callable
import importlib import importlib
import json import json
import os import os
from collections.abc import Callable
from typing import Any from typing import Any
from crewai.tools import BaseTool, EnvVar from crewai.tools import BaseTool, EnvVar
@@ -12,9 +12,13 @@ from pydantic.types import ImportString
class QdrantToolSchema(BaseModel): class QdrantToolSchema(BaseModel):
query: str = Field(..., description="Query to search in Qdrant DB.") query: str = Field(..., description="Query to search in Qdrant DB")
filter_by: str | None = None filter_by: str | None = Field(
filter_value: str | None = None default=None, description="Parameter to filter the search by."
)
filter_value: Any | None = Field(
default=None, description="Value to filter the search by."
)
class QdrantConfig(BaseModel): class QdrantConfig(BaseModel):
@@ -25,7 +29,9 @@ class QdrantConfig(BaseModel):
collection_name: str collection_name: str
limit: int = 3 limit: int = 3
score_threshold: float = 0.35 score_threshold: float = 0.35
filter_conditions: list[tuple[str, Any]] = Field(default_factory=list) filter: Any | None = Field(
default=None, description="Qdrant Filter instance for advanced filtering."
)
class QdrantVectorSearchTool(BaseTool): class QdrantVectorSearchTool(BaseTool):
@@ -76,23 +82,26 @@ class QdrantVectorSearchTool(BaseTool):
filter_value: Any | None = None, filter_value: Any | None = None,
) -> str: ) -> str:
"""Perform vector similarity search.""" """Perform vector similarity search."""
filter_ = self.qdrant_package.http.models.Filter
field_condition = self.qdrant_package.http.models.FieldCondition
match_value = self.qdrant_package.http.models.MatchValue
conditions = self.qdrant_config.filter_conditions.copy()
if filter_by and filter_value is not None:
conditions.append((filter_by, filter_value))
search_filter = ( search_filter = (
filter_( self.qdrant_config.filter.model_copy()
must=[ if self.qdrant_config.filter is not None
field_condition(key=k, match=match_value(value=v)) else self.qdrant_package.http.models.Filter(must=[])
for k, v in conditions
]
)
if conditions
else None
) )
if filter_by and filter_value is not None:
if not hasattr(search_filter, "must") or not isinstance(
search_filter.must, list
):
search_filter.must = []
search_filter.must.append(
self.qdrant_package.http.models.FieldCondition(
key=filter_by,
match=self.qdrant_package.http.models.MatchValue(
value=filter_value
),
)
)
query_vector = ( query_vector = (
self.custom_embedding_fn(query) self.custom_embedding_fn(query)
if self.custom_embedding_fn if self.custom_embedding_fn