Fix CI: Make pgvector an optional dependency, fix SQL injection and type errors

Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
Devin AI
2025-05-22 10:15:58 +00:00
parent 30486acb4d
commit f1a91c506b
4 changed files with 41 additions and 16 deletions

View File

@@ -67,6 +67,11 @@ docling = [
aisuite = [
"aisuite>=0.1.10",
]
pgvector = [
"pgvector>=0.2.0",
"sqlalchemy>=2.0.0",
"psycopg2-binary>=2.9.0",
]
[tool.uv]
dev-dependencies = [

View File

@@ -1 +1,5 @@
from crewai.knowledge.storage.pgvector_knowledge_storage import PGVectorKnowledgeStorage
try:
from crewai.knowledge.storage.pgvector_knowledge_storage import PGVectorKnowledgeStorage
__all__ = ["PGVectorKnowledgeStorage"]
except ImportError:
__all__ = []

View File

@@ -2,24 +2,34 @@ from typing import Any, Dict, List, Optional
import hashlib
import logging
import os
from sqlalchemy import create_engine, Column, String, Text, Float
from sqlalchemy import create_engine, Column, String, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from pgvector.sqlalchemy import Vector
from sqlalchemy.sql import text
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
from crewai.utilities import EmbeddingConfigurator
try:
from pgvector.sqlalchemy import Vector
HAS_PGVECTOR = True
except ImportError:
HAS_PGVECTOR = False
class VectorType:
def __init__(self, dimensions: int):
self.dimensions = dimensions
Vector = VectorType # type: ignore
Base = declarative_base()
class Document(Base):
class Document(Base): # type: ignore
"""SQLAlchemy model for document storage with pgvector."""
__tablename__ = "documents"
id = Column(String, primary_key=True)
content = Column(Text)
metadata = Column(Text) # JSON serialized metadata
embedding = Column(Vector(1536)) # Adjust dimension based on embedding model
embedding: Column = Column(Vector(1536)) # Adjust dimension based on embedding model
class PGVectorKnowledgeStorage(BaseKnowledgeStorage):
"""
@@ -45,6 +55,11 @@ class PGVectorKnowledgeStorage(BaseKnowledgeStorage):
table_name: Name of the table to store documents
embedding_dimension: Dimension of the embedding vectors
"""
if not HAS_PGVECTOR:
raise ImportError(
"pgvector is not installed. Please install it with: pip install pgvector"
)
self.connection_string = connection_string
self.table_name = table_name
self.embedding_dimension = embedding_dimension
@@ -94,14 +109,17 @@ class PGVectorKnowledgeStorage(BaseKnowledgeStorage):
try:
query_embedding = self.embedder([query[0]])[0]
sql_query = f"""
SELECT id, content, metadata, 1 - (embedding <=> '{query_embedding}') as similarity
sql_query = text(f"""
SELECT id, content, metadata, 1 - (embedding <=> :query_embedding) as similarity
FROM {self.table_name}
ORDER BY embedding <=> '{query_embedding}'
LIMIT {limit}
"""
ORDER BY embedding <=> :query_embedding
LIMIT :limit
""")
results = session.execute(sql_query).fetchall()
results = session.execute(
sql_query,
{"query_embedding": query_embedding, "limit": limit}
).fetchall()
formatted_results = []
for row in results:
@@ -154,9 +172,9 @@ class PGVectorKnowledgeStorage(BaseKnowledgeStorage):
existing = session.query(Document).filter(Document.id == doc_id).first()
if existing:
existing.content = doc
existing.metadata = str(meta) if meta else None
existing.embedding = embedding
setattr(existing, "content", doc)
setattr(existing, "metadata", str(meta) if meta else None)
setattr(existing, "embedding", embedding)
else:
new_doc = Document(
id=doc_id,

View File

@@ -1,6 +1,4 @@
import os
import pytest
from typing import Dict, Any, List
from unittest.mock import patch, MagicMock
from crewai.knowledge.storage.pgvector_knowledge_storage import PGVectorKnowledgeStorage