mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-02 13:48:09 +00:00
perf: move embedding calls outside cross-process lock in RAG adapter
This commit is contained in:
@@ -99,6 +99,45 @@ class RAG(Adapter):
|
||||
loader_result = loader.load(source_content)
|
||||
doc_id = loader_result.doc_id
|
||||
|
||||
chunks = chunker.chunk(loader_result.content)
|
||||
documents = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
doc_metadata = (metadata or {}).copy()
|
||||
doc_metadata["chunk_index"] = i
|
||||
documents.append(
|
||||
Document(
|
||||
id=compute_sha256(chunk),
|
||||
content=chunk,
|
||||
metadata=doc_metadata,
|
||||
data_type=data_type,
|
||||
source=loader_result.source,
|
||||
)
|
||||
)
|
||||
|
||||
if not documents:
|
||||
logger.warning("No documents to add")
|
||||
return
|
||||
|
||||
contents = [doc.content for doc in documents]
|
||||
try:
|
||||
embeddings = self._embedding_service.embed_batch(contents)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate embeddings: {e}")
|
||||
return
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
metadatas = []
|
||||
for doc in documents:
|
||||
doc_metadata = doc.metadata.copy()
|
||||
doc_metadata.update(
|
||||
{
|
||||
"data_type": doc.data_type.value,
|
||||
"source": doc.source,
|
||||
"doc_id": doc_id,
|
||||
}
|
||||
)
|
||||
metadatas.append(doc_metadata)
|
||||
|
||||
with store_lock(self._lock_name):
|
||||
existing_doc = self._collection.get(
|
||||
where={"source": source_content.source_ref}, limit=1
|
||||
@@ -119,47 +158,6 @@ class RAG(Adapter):
|
||||
logger.warning(f"Deleting old document with doc_id {existing_doc_id}")
|
||||
self._collection.delete(where={"doc_id": existing_doc_id})
|
||||
|
||||
documents = []
|
||||
|
||||
chunks = chunker.chunk(loader_result.content)
|
||||
for i, chunk in enumerate(chunks):
|
||||
doc_metadata = (metadata or {}).copy()
|
||||
doc_metadata["chunk_index"] = i
|
||||
documents.append(
|
||||
Document(
|
||||
id=compute_sha256(chunk),
|
||||
content=chunk,
|
||||
metadata=doc_metadata,
|
||||
data_type=data_type,
|
||||
source=loader_result.source,
|
||||
)
|
||||
)
|
||||
|
||||
if not documents:
|
||||
logger.warning("No documents to add")
|
||||
return
|
||||
|
||||
contents = [doc.content for doc in documents]
|
||||
try:
|
||||
embeddings = self._embedding_service.embed_batch(contents)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate embeddings: {e}")
|
||||
return
|
||||
|
||||
ids = [doc.id for doc in documents]
|
||||
metadatas = []
|
||||
|
||||
for doc in documents:
|
||||
doc_metadata = doc.metadata.copy()
|
||||
doc_metadata.update(
|
||||
{
|
||||
"data_type": doc.data_type.value,
|
||||
"source": doc.source,
|
||||
"doc_id": doc_id,
|
||||
}
|
||||
)
|
||||
metadatas.append(doc_metadata)
|
||||
|
||||
try:
|
||||
self._collection.add(
|
||||
ids=ids,
|
||||
|
||||
Reference in New Issue
Block a user