mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 16:48:30 +00:00
refactor: Address review feedback
- Add comprehensive error handling - Add input validation and sanitization - Add memory management features - Add performance testing - Add logging integration - Improve documentation with examples - Update dependency version range Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
@@ -5,25 +5,50 @@ The FAISS Search Tool enables efficient vector similarity search using Facebook
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from typing import List, Dict, Any
|
||||
from crewai import Agent
|
||||
from crewai.tools import FAISSSearchTool
|
||||
|
||||
# Initialize tool
|
||||
search_tool = FAISSSearchTool(
|
||||
index_type="L2", # or "IP" for inner product
|
||||
dimension=384, # Match your embedder's dimension
|
||||
embedder_config={
|
||||
index_type: str = "L2", # or "IP" for inner product
|
||||
dimension: int = 384, # Match your embedder's dimension
|
||||
embedder_config: Dict[str, Any] = {
|
||||
"provider": "fastembed",
|
||||
"model": "BAAI/bge-small-en-v1.5"
|
||||
}
|
||||
)
|
||||
|
||||
# Add documents
|
||||
search_tool.add_texts([
|
||||
"Document 1 content",
|
||||
"Document 2 content",
|
||||
# ...
|
||||
])
|
||||
# Add documents (with error handling)
|
||||
try:
|
||||
search_tool.add_texts([
|
||||
"Document 1 content",
|
||||
"Document 2 content",
|
||||
# ...
|
||||
])
|
||||
except ValueError as e:
|
||||
print(f"Failed to add documents: {e}")
|
||||
|
||||
# Add large document sets efficiently
|
||||
try:
|
||||
search_tool.add_texts_batch(
|
||||
texts=["Doc 1", "Doc 2", ...], # Large list of documents
|
||||
batch_size=1000 # Process in batches to manage memory
|
||||
)
|
||||
except ValueError as e:
|
||||
print(f"Failed to add documents in batch: {e}")
|
||||
|
||||
# Search with error handling
|
||||
try:
|
||||
results = search_tool.run(
|
||||
query="search query",
|
||||
k=3, # Number of results
|
||||
score_threshold=0.6 # Minimum similarity score
|
||||
)
|
||||
for result in results:
|
||||
print(f"Text: {result['text']}, Score: {result['score']}")
|
||||
except ValueError as e:
|
||||
print(f"Search failed: {e}")
|
||||
|
||||
# Create agent with tool
|
||||
agent = Agent(
|
||||
@@ -56,3 +81,62 @@ Configuration for the embedding model. Supports all CrewAI embedder providers:
|
||||
- openai
|
||||
- google
|
||||
- ollama
|
||||
|
||||
## Error Handling
|
||||
|
||||
The tool includes comprehensive error handling:
|
||||
|
||||
```python
|
||||
# Invalid index type
|
||||
try:
|
||||
tool = FAISSSearchTool(index_type="INVALID")
|
||||
except ValueError as e:
|
||||
print(f"Invalid index type: {e}")
|
||||
|
||||
# Empty query
|
||||
try:
|
||||
results = tool.run(query="")
|
||||
except ValueError as e:
|
||||
print(f"Invalid query: {e}") # "Query cannot be empty"
|
||||
|
||||
# Invalid k value
|
||||
try:
|
||||
results = tool.run(query="test", k=0)
|
||||
except ValueError as e:
|
||||
print(f"Invalid k: {e}") # "k must be positive"
|
||||
|
||||
# Invalid score threshold
|
||||
try:
|
||||
results = tool.run(query="test", score_threshold=1.5)
|
||||
except ValueError as e:
|
||||
print(f"Invalid threshold: {e}") # "score_threshold must be between 0 and 1"
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Memory Management
|
||||
For large document sets, use batch processing to manage memory efficiently:
|
||||
```python
|
||||
# Process documents in batches
|
||||
tool.add_texts_batch(texts=large_document_list, batch_size=1000)
|
||||
```
|
||||
|
||||
### Index Management
|
||||
Monitor and manage index size:
|
||||
```python
|
||||
# Check index size
|
||||
print(f"Current index size: {tool.index_size}")
|
||||
|
||||
# Check if index is empty
|
||||
if tool.is_empty:
|
||||
print("Index is empty")
|
||||
|
||||
# Clear index if needed
|
||||
tool.clear_index()
|
||||
```
|
||||
|
||||
### Performance Metrics
|
||||
The tool is optimized for performance:
|
||||
- Search operations typically complete within 1 second for indices up to 1000 documents
|
||||
- Batch processing helps manage memory for large document sets
|
||||
- Input sanitization ensures query safety without significant overhead
|
||||
|
||||
Reference in New Issue
Block a user