mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
- Add comprehensive error handling - Add input validation and sanitization - Add memory management features - Add performance testing - Add logging integration - Improve documentation with examples - Update dependency version range Co-Authored-By: Joe Moura <joao@crewai.com>
143 lines
3.6 KiB
Plaintext
143 lines
3.6 KiB
Plaintext
# FAISS Search Tool
|
|
|
|
The FAISS Search Tool enables efficient vector similarity search using Facebook AI Similarity Search (FAISS).
|
|
|
|
## Usage
|
|
|
|
```python
|
|
from typing import List, Dict, Any
|
|
from crewai import Agent
|
|
from crewai.tools import FAISSSearchTool
|
|
|
|
# Initialize tool
|
|
search_tool = FAISSSearchTool(
|
|
index_type: str = "L2", # or "IP" for inner product
|
|
dimension: int = 384, # Match your embedder's dimension
|
|
embedder_config: Dict[str, Any] = {
|
|
"provider": "fastembed",
|
|
"model": "BAAI/bge-small-en-v1.5"
|
|
}
|
|
)
|
|
|
|
# Add documents (with error handling)
|
|
try:
|
|
search_tool.add_texts([
|
|
"Document 1 content",
|
|
"Document 2 content",
|
|
# ...
|
|
])
|
|
except ValueError as e:
|
|
print(f"Failed to add documents: {e}")
|
|
|
|
# Add large document sets efficiently
|
|
try:
|
|
search_tool.add_texts_batch(
|
|
texts=["Doc 1", "Doc 2", ...], # Large list of documents
|
|
batch_size=1000 # Process in batches to manage memory
|
|
)
|
|
except ValueError as e:
|
|
print(f"Failed to add documents in batch: {e}")
|
|
|
|
# Search with error handling
|
|
try:
|
|
results = search_tool.run(
|
|
query="search query",
|
|
k=3, # Number of results
|
|
score_threshold=0.6 # Minimum similarity score
|
|
)
|
|
for result in results:
|
|
print(f"Text: {result['text']}, Score: {result['score']}")
|
|
except ValueError as e:
|
|
print(f"Search failed: {e}")
|
|
|
|
# Create agent with tool
|
|
agent = Agent(
|
|
role="researcher",
|
|
goal="Find relevant information",
|
|
tools=[search_tool]
|
|
)
|
|
```
|
|
|
|
## Configuration
|
|
|
|
| Parameter | Type | Description |
|
|
|-----------|------|-------------|
|
|
| index_type | str | FAISS index type ("L2" or "IP") |
|
|
| dimension | int | Embedding dimension |
|
|
| embedder_config | dict | Embedder configuration |
|
|
|
|
## Parameters
|
|
|
|
### index_type
|
|
- `"L2"`: Euclidean distance (default)
|
|
- `"IP"`: Inner product similarity
|
|
|
|
### dimension
|
|
Default is 384, which matches the BAAI/bge-small-en-v1.5 model. Adjust this to match your chosen embedder model's output dimension.
|
|
|
|
### embedder_config
|
|
Configuration for the embedding model. Supports all CrewAI embedder providers:
|
|
- fastembed (default)
|
|
- openai
|
|
- google
|
|
- ollama
|
|
|
|
## Error Handling
|
|
|
|
The tool includes comprehensive error handling:
|
|
|
|
```python
|
|
# Invalid index type
|
|
try:
|
|
tool = FAISSSearchTool(index_type="INVALID")
|
|
except ValueError as e:
|
|
print(f"Invalid index type: {e}")
|
|
|
|
# Empty query
|
|
try:
|
|
results = tool.run(query="")
|
|
except ValueError as e:
|
|
print(f"Invalid query: {e}") # "Query cannot be empty"
|
|
|
|
# Invalid k value
|
|
try:
|
|
results = tool.run(query="test", k=0)
|
|
except ValueError as e:
|
|
print(f"Invalid k: {e}") # "k must be positive"
|
|
|
|
# Invalid score threshold
|
|
try:
|
|
results = tool.run(query="test", score_threshold=1.5)
|
|
except ValueError as e:
|
|
print(f"Invalid threshold: {e}") # "score_threshold must be between 0 and 1"
|
|
```
|
|
|
|
## Performance Considerations
|
|
|
|
### Memory Management
|
|
For large document sets, use batch processing to manage memory efficiently:
|
|
```python
|
|
# Process documents in batches
|
|
tool.add_texts_batch(texts=large_document_list, batch_size=1000)
|
|
```
|
|
|
|
### Index Management
|
|
Monitor and manage index size:
|
|
```python
|
|
# Check index size
|
|
print(f"Current index size: {tool.index_size}")
|
|
|
|
# Check if index is empty
|
|
if tool.is_empty:
|
|
print("Index is empty")
|
|
|
|
# Clear index if needed
|
|
tool.clear_index()
|
|
```
|
|
|
|
### Performance Metrics
|
|
The tool is optimized for performance:
|
|
- Search operations typically complete within 1 second for indices up to 1000 documents
|
|
- Batch processing helps manage memory for large document sets
|
|
- Input sanitization ensures query safety without significant overhead
|