From a6e4d35bb998c18fdc69d9b48a114eafc3247573 Mon Sep 17 00:00:00 2001 From: Greyson Lalonde Date: Thu, 12 Mar 2026 22:23:13 -0400 Subject: [PATCH] perf: move embedding calls outside cross-process lock in RAG adapter --- lib/crewai-tools/src/crewai_tools/rag/core.py | 80 +++++++++---------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/rag/core.py b/lib/crewai-tools/src/crewai_tools/rag/core.py index e353ead4d..d8bc51e15 100644 --- a/lib/crewai-tools/src/crewai_tools/rag/core.py +++ b/lib/crewai-tools/src/crewai_tools/rag/core.py @@ -99,6 +99,45 @@ class RAG(Adapter): loader_result = loader.load(source_content) doc_id = loader_result.doc_id + chunks = chunker.chunk(loader_result.content) + documents = [] + for i, chunk in enumerate(chunks): + doc_metadata = (metadata or {}).copy() + doc_metadata["chunk_index"] = i + documents.append( + Document( + id=compute_sha256(chunk), + content=chunk, + metadata=doc_metadata, + data_type=data_type, + source=loader_result.source, + ) + ) + + if not documents: + logger.warning("No documents to add") + return + + contents = [doc.content for doc in documents] + try: + embeddings = self._embedding_service.embed_batch(contents) + except Exception as e: + logger.error(f"Failed to generate embeddings: {e}") + return + + ids = [doc.id for doc in documents] + metadatas = [] + for doc in documents: + doc_metadata = doc.metadata.copy() + doc_metadata.update( + { + "data_type": doc.data_type.value, + "source": doc.source, + "doc_id": doc_id, + } + ) + metadatas.append(doc_metadata) + with store_lock(self._lock_name): existing_doc = self._collection.get( where={"source": source_content.source_ref}, limit=1 @@ -119,47 +158,6 @@ class RAG(Adapter): logger.warning(f"Deleting old document with doc_id {existing_doc_id}") self._collection.delete(where={"doc_id": existing_doc_id}) - documents = [] - - chunks = chunker.chunk(loader_result.content) - for i, chunk in enumerate(chunks): - doc_metadata = (metadata or {}).copy() - doc_metadata["chunk_index"] = i - documents.append( - Document( - id=compute_sha256(chunk), - content=chunk, - metadata=doc_metadata, - data_type=data_type, - source=loader_result.source, - ) - ) - - if not documents: - logger.warning("No documents to add") - return - - contents = [doc.content for doc in documents] - try: - embeddings = self._embedding_service.embed_batch(contents) - except Exception as e: - logger.error(f"Failed to generate embeddings: {e}") - return - - ids = [doc.id for doc in documents] - metadatas = [] - - for doc in documents: - doc_metadata = doc.metadata.copy() - doc_metadata.update( - { - "data_type": doc.data_type.value, - "source": doc.source, - "doc_id": doc_id, - } - ) - metadatas.append(doc_metadata) - try: self._collection.add( ids=ids,