mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-22 14:48:13 +00:00
Use file lock around Chroma client initialization (#3181)
This commit fixes a bug with concurrent processess and Chroma where `table collections already exists` (and similar) were raised. https://cookbook.chromadb.dev/core/system_constraints/
This commit is contained in:
@@ -18,6 +18,7 @@ from crewai.utilities.chromadb import sanitize_collection_name
|
||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
||||
from crewai.utilities.logger import Logger
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
from crewai.utilities.chromadb import create_persistent_client
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -84,14 +85,11 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
raise Exception("Collection not initialized")
|
||||
|
||||
def initialize_knowledge_storage(self):
|
||||
base_path = os.path.join(db_storage_path(), "knowledge")
|
||||
chroma_client = chromadb.PersistentClient(
|
||||
path=base_path,
|
||||
self.app = create_persistent_client(
|
||||
path=os.path.join(db_storage_path(), "knowledge"),
|
||||
settings=Settings(allow_reset=True),
|
||||
)
|
||||
|
||||
self.app = chroma_client
|
||||
|
||||
try:
|
||||
collection_name = (
|
||||
f"knowledge_{self.collection_name}"
|
||||
@@ -111,9 +109,8 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
||||
def reset(self):
|
||||
base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY)
|
||||
if not self.app:
|
||||
self.app = chromadb.PersistentClient(
|
||||
path=base_path,
|
||||
settings=Settings(allow_reset=True),
|
||||
self.app = create_persistent_client(
|
||||
path=base_path, settings=Settings(allow_reset=True)
|
||||
)
|
||||
|
||||
self.app.reset()
|
||||
|
||||
@@ -4,12 +4,12 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from chromadb.api import ClientAPI
|
||||
|
||||
from crewai.memory.storage.base_rag_storage import BaseRAGStorage
|
||||
from crewai.utilities import EmbeddingConfigurator
|
||||
from crewai.utilities.chromadb import create_persistent_client
|
||||
from crewai.utilities.constants import MAX_FILE_NAME_LENGTH
|
||||
from crewai.utilities.paths import db_storage_path
|
||||
|
||||
@@ -60,17 +60,15 @@ class RAGStorage(BaseRAGStorage):
|
||||
self.embedder_config = configurator.configure_embedder(self.embedder_config)
|
||||
|
||||
def _initialize_app(self):
|
||||
import chromadb
|
||||
from chromadb.config import Settings
|
||||
|
||||
self._set_embedder_config()
|
||||
chroma_client = chromadb.PersistentClient(
|
||||
|
||||
self.app = create_persistent_client(
|
||||
path=self.path if self.path else self.storage_file_name,
|
||||
settings=Settings(allow_reset=self.allow_reset),
|
||||
)
|
||||
|
||||
self.app = chroma_client
|
||||
|
||||
self.collection = self.app.get_or_create_collection(
|
||||
name=self.type, embedding_function=self.embedder_config
|
||||
)
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import re
|
||||
import portalocker
|
||||
from chromadb import PersistentClient
|
||||
from hashlib import md5
|
||||
from typing import Optional
|
||||
|
||||
|
||||
MIN_COLLECTION_LENGTH = 3
|
||||
MAX_COLLECTION_LENGTH = 63
|
||||
DEFAULT_COLLECTION = "default_collection"
|
||||
@@ -60,3 +64,16 @@ def sanitize_collection_name(name: Optional[str], max_collection_length: int = M
|
||||
sanitized = sanitized[:-1] + "z"
|
||||
|
||||
return sanitized
|
||||
|
||||
|
||||
def create_persistent_client(path: str, **kwargs):
|
||||
"""
|
||||
Creates a persistent client for ChromaDB with a lock file to prevent
|
||||
concurrent creations. Works for both multi-threads and multi-processes
|
||||
environments.
|
||||
"""
|
||||
lockfile = f"chromadb-{md5(path.encode(), usedforsecurity=False).hexdigest()}.lock"
|
||||
with portalocker.Lock(lockfile):
|
||||
client = PersistentClient(path=path, **kwargs)
|
||||
|
||||
return client
|
||||
|
||||
Reference in New Issue
Block a user