Fix #2919: Make chromadb an optional dependency to resolve package conflicts

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-05-30 08:56:35 +00:00
parent 55ed91e313
commit 7b129fc847
5 changed files with 482 additions and 170 deletions

View File

@@ -21,7 +21,6 @@ dependencies = [
"opentelemetry-sdk>=1.30.0",
"opentelemetry-exporter-otlp-proto-http>=1.30.0",
# Data Handling
"chromadb>=0.5.23",
"openpyxl>=3.1.5",
"pyvis>=0.3.2",
# Authentication and Security
@@ -49,6 +48,9 @@ tools = ["crewai-tools~=0.45.0"]
embeddings = [
"tiktoken~=0.7.0"
]
storage = [
"chromadb>=0.5.23"
]
agentops = ["agentops>=0.3.0"]
fastembed = ["fastembed>=0.4.1"]
pdfplumber = [

View File

@@ -6,11 +6,19 @@ import os
import shutil
from typing import Any, Dict, List, Optional, Union
import chromadb
import chromadb.errors
from chromadb.api import ClientAPI
from chromadb.api.types import OneOrMany
from chromadb.config import Settings
try:
import chromadb
import chromadb.errors
from chromadb.api import ClientAPI
from chromadb.api.types import OneOrMany
from chromadb.config import Settings
HAS_CHROMADB = True
except ImportError:
chromadb = None # type: ignore
ClientAPI = Any # type: ignore
OneOrMany = Any # type: ignore
Settings = Any # type: ignore
HAS_CHROMADB = False
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
from crewai.utilities import EmbeddingConfigurator
@@ -62,6 +70,12 @@ class KnowledgeStorage(BaseKnowledgeStorage):
filter: Optional[dict] = None,
score_threshold: float = 0.35,
) -> List[Dict[str, Any]]:
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
with suppress_logging():
if self.collection:
fetched = self.collection.query(
@@ -84,48 +98,78 @@ class KnowledgeStorage(BaseKnowledgeStorage):
raise Exception("Collection not initialized")
def initialize_knowledge_storage(self):
base_path = os.path.join(db_storage_path(), "knowledge")
chroma_client = chromadb.PersistentClient(
path=base_path,
settings=Settings(allow_reset=True),
)
self.app = chroma_client
try:
collection_name = (
f"knowledge_{self.collection_name}"
if self.collection_name
else "knowledge"
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
if self.app:
self.collection = self.app.get_or_create_collection(
name=sanitize_collection_name(collection_name),
embedding_function=self.embedder,
)
else:
raise Exception("Vector Database Client not initialized")
except Exception:
raise Exception("Failed to create or get collection")
def reset(self):
base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY)
if not self.app:
self.app = chromadb.PersistentClient(
base_path = os.path.join(db_storage_path(), "knowledge")
try:
chroma_client = chromadb.PersistentClient(
path=base_path,
settings=Settings(allow_reset=True),
)
self.app.reset()
shutil.rmtree(base_path)
self.app = None
self.collection = None
self.app = chroma_client
try:
collection_name = (
f"knowledge_{self.collection_name}"
if self.collection_name
else "knowledge"
)
if self.app:
self.collection = self.app.get_or_create_collection(
name=sanitize_collection_name(collection_name),
embedding_function=self.embedder,
)
else:
raise Exception("Vector Database Client not initialized")
except Exception:
raise Exception("Failed to create or get collection")
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def reset(self):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY)
try:
if not self.app:
self.app = chromadb.PersistentClient(
path=base_path,
settings=Settings(allow_reset=True),
)
self.app.reset()
shutil.rmtree(base_path)
self.app = None
self.collection = None
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def save(
self,
documents: List[str],
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
if not self.collection:
raise Exception("Collection not initialized")
@@ -156,7 +200,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
filtered_ids.append(doc_id)
# If we have no metadata at all, set it to None
final_metadata: Optional[OneOrMany[chromadb.Metadata]] = (
final_metadata: Optional[OneOrMany[Any]] = (
None if all(m is None for m in filtered_metadata) else filtered_metadata
)
@@ -165,29 +209,47 @@ class KnowledgeStorage(BaseKnowledgeStorage):
metadatas=final_metadata,
ids=filtered_ids,
)
except chromadb.errors.InvalidDimensionException as e:
Logger(verbose=True).log(
"error",
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
"red",
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
raise ValueError(
"Embedding dimension mismatch. Make sure you're using the same embedding model "
"across all operations with this collection."
"Try resetting the collection using `crewai reset-memories -a`"
) from e
except Exception as e:
Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red")
raise
if HAS_CHROMADB and isinstance(e, chromadb.errors.InvalidDimensionException):
Logger(verbose=True).log(
"error",
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
"red",
)
raise ValueError(
"Embedding dimension mismatch. Make sure you're using the same embedding model "
"across all operations with this collection."
"Try resetting the collection using `crewai reset-memories -a`"
) from e
else:
Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red")
raise
def _create_default_embedding_function(self):
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
)
return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
)
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None:
"""Set the embedding configuration for the knowledge storage.

View File

@@ -4,9 +4,14 @@ import logging
import os
import shutil
import uuid
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union
from chromadb.api import ClientAPI
try:
from chromadb.api import ClientAPI
HAS_CHROMADB = True
except ImportError:
ClientAPI = Any # type: ignore
HAS_CHROMADB = False
from crewai.memory.storage.base_rag_storage import BaseRAGStorage
from crewai.utilities import EmbeddingConfigurator
@@ -60,24 +65,36 @@ class RAGStorage(BaseRAGStorage):
self.embedder_config = configurator.configure_embedder(self.embedder_config)
def _initialize_app(self):
import chromadb
from chromadb.config import Settings
self._set_embedder_config()
chroma_client = chromadb.PersistentClient(
path=self.path if self.path else self.storage_file_name,
settings=Settings(allow_reset=self.allow_reset),
)
self.app = chroma_client
try:
self.collection = self.app.get_collection(
name=self.type, embedding_function=self.embedder_config
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)
except Exception:
self.collection = self.app.create_collection(
name=self.type, embedding_function=self.embedder_config
try:
import chromadb
from chromadb.config import Settings
self._set_embedder_config()
chroma_client = chromadb.PersistentClient(
path=self.path if self.path else self.storage_file_name,
settings=Settings(allow_reset=self.allow_reset),
)
self.app = chroma_client
try:
self.collection = self.app.get_collection(
name=self.type, embedding_function=self.embedder_config
)
except Exception:
self.collection = self.app.create_collection(
name=self.type, embedding_function=self.embedder_config
)
except ImportError:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def _sanitize_role(self, role: str) -> str:
@@ -165,10 +182,22 @@ class RAGStorage(BaseRAGStorage):
)
def _create_default_embedding_function(self):
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
)
return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
)
except ImportError:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)

View File

@@ -1,8 +1,21 @@
import os
from typing import Any, Dict, Optional, cast
from typing import Any, Dict, Optional, Union, cast, Protocol
from chromadb import Documents, EmbeddingFunction, Embeddings
from chromadb.api.types import validate_embedding_function
try:
from chromadb import Documents, EmbeddingFunction, Embeddings
from chromadb.api.types import validate_embedding_function
HAS_CHROMADB = True
except ImportError:
HAS_CHROMADB = False
class EmbeddingFunction(Protocol):
def __call__(self, input: Any) -> Any: ...
Documents = Any
Embeddings = Any
def validate_embedding_function(func: Any) -> None:
pass
class EmbeddingConfigurator:
@@ -26,6 +39,12 @@ class EmbeddingConfigurator:
embedder_config: Optional[Dict[str, Any]] = None,
) -> EmbeddingFunction:
"""Configures and returns an embedding function based on the provided config."""
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
if embedder_config is None:
return self._create_default_embedding_function()
@@ -47,129 +66,249 @@ class EmbeddingConfigurator:
@staticmethod
def _create_default_embedding_function():
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
)
return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_openai(config, model_name):
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
return OpenAIEmbeddingFunction(
api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"),
model_name=model_name,
api_base=config.get("api_base", None),
api_type=config.get("api_type", None),
api_version=config.get("api_version", None),
default_headers=config.get("default_headers", None),
dimensions=config.get("dimensions", None),
deployment_id=config.get("deployment_id", None),
organization_id=config.get("organization_id", None),
)
return OpenAIEmbeddingFunction(
api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"),
model_name=model_name,
api_base=config.get("api_base", None),
api_type=config.get("api_type", None),
api_version=config.get("api_version", None),
default_headers=config.get("default_headers", None),
dimensions=config.get("dimensions", None),
deployment_id=config.get("deployment_id", None),
organization_id=config.get("organization_id", None),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_azure(config, model_name):
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
return OpenAIEmbeddingFunction(
api_key=config.get("api_key"),
api_base=config.get("api_base"),
api_type=config.get("api_type", "azure"),
api_version=config.get("api_version"),
model_name=model_name,
default_headers=config.get("default_headers"),
dimensions=config.get("dimensions"),
deployment_id=config.get("deployment_id"),
organization_id=config.get("organization_id"),
)
return OpenAIEmbeddingFunction(
api_key=config.get("api_key"),
api_base=config.get("api_base"),
api_type=config.get("api_type", "azure"),
api_version=config.get("api_version"),
model_name=model_name,
default_headers=config.get("default_headers"),
dimensions=config.get("dimensions"),
deployment_id=config.get("deployment_id"),
organization_id=config.get("organization_id"),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_ollama(config, model_name):
from chromadb.utils.embedding_functions.ollama_embedding_function import (
OllamaEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.ollama_embedding_function import (
OllamaEmbeddingFunction,
)
return OllamaEmbeddingFunction(
url=config.get("url", "http://localhost:11434/api/embeddings"),
model_name=model_name,
)
return OllamaEmbeddingFunction(
url=config.get("url", "http://localhost:11434/api/embeddings"),
model_name=model_name,
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_vertexai(config, model_name):
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleVertexEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleVertexEmbeddingFunction,
)
return GoogleVertexEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
project_id=config.get("project_id"),
region=config.get("region"),
)
return GoogleVertexEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
project_id=config.get("project_id"),
region=config.get("region"),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_google(config, model_name):
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleGenerativeAiEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleGenerativeAiEmbeddingFunction,
)
return GoogleGenerativeAiEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
task_type=config.get("task_type"),
)
return GoogleGenerativeAiEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
task_type=config.get("task_type"),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_cohere(config, model_name):
from chromadb.utils.embedding_functions.cohere_embedding_function import (
CohereEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.cohere_embedding_function import (
CohereEmbeddingFunction,
)
return CohereEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
)
return CohereEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_voyageai(config, model_name):
from chromadb.utils.embedding_functions.voyageai_embedding_function import (
VoyageAIEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.voyageai_embedding_function import (
VoyageAIEmbeddingFunction,
)
return VoyageAIEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
)
return VoyageAIEmbeddingFunction(
model_name=model_name,
api_key=config.get("api_key"),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_bedrock(config, model_name):
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
AmazonBedrockEmbeddingFunction,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
AmazonBedrockEmbeddingFunction,
)
# Allow custom model_name override with backwards compatibility
kwargs = {"session": config.get("session")}
if model_name is not None:
kwargs["model_name"] = model_name
return AmazonBedrockEmbeddingFunction(**kwargs)
# Allow custom model_name override with backwards compatibility
kwargs = {"session": config.get("session")}
if model_name is not None:
kwargs["model_name"] = model_name
return AmazonBedrockEmbeddingFunction(**kwargs)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_huggingface(config, model_name):
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
HuggingFaceEmbeddingServer,
)
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
HuggingFaceEmbeddingServer,
)
return HuggingFaceEmbeddingServer(
url=config.get("api_url"),
)
return HuggingFaceEmbeddingServer(
url=config.get("api_url"),
)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod
def _configure_watson(config, model_name):
@@ -212,6 +351,12 @@ class EmbeddingConfigurator:
@staticmethod
def _configure_custom(config):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
custom_embedder = config.get("embedder")
if isinstance(custom_embedder, EmbeddingFunction):
try:

View File

@@ -0,0 +1,74 @@
import pytest
import importlib
import sys
from unittest.mock import patch
def test_import_without_chromadb():
"""Test that crewai can be imported without chromadb."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.memory.storage.rag_storage" in sys.modules:
importlib.reload(sys.modules["crewai.memory.storage.rag_storage"])
if "crewai.knowledge.storage.knowledge_storage" in sys.modules:
importlib.reload(sys.modules["crewai.knowledge.storage.knowledge_storage"])
if "crewai.utilities.embedding_configurator" in sys.modules:
importlib.reload(sys.modules["crewai.utilities.embedding_configurator"])
import crewai
from crewai import Agent, Task, Crew, Process
agent = Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
task = Task(description="Test Task", agent=agent)
crew = Crew(agents=[agent], tasks=[task], process=Process.sequential)
def test_memory_storage_without_chromadb():
"""Test that memory storage raises appropriate error when chromadb is not available."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.memory.storage.rag_storage" in sys.modules:
importlib.reload(sys.modules["crewai.memory.storage.rag_storage"])
from crewai.memory.storage.rag_storage import RAGStorage, HAS_CHROMADB
assert not HAS_CHROMADB
with pytest.raises(ImportError) as excinfo:
storage = RAGStorage()
storage._initialize_app()
assert "ChromaDB is required for memory storage features" in str(excinfo.value)
def test_knowledge_storage_without_chromadb():
"""Test that knowledge storage raises appropriate error when chromadb is not available."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.knowledge.storage.knowledge_storage" in sys.modules:
importlib.reload(sys.modules["crewai.knowledge.storage.knowledge_storage"])
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage, HAS_CHROMADB
assert not HAS_CHROMADB
with pytest.raises(ImportError) as excinfo:
storage = KnowledgeStorage()
storage.initialize_knowledge_storage()
assert "ChromaDB is required for knowledge storage features" in str(excinfo.value)
def test_embedding_configurator_without_chromadb():
"""Test that embedding configurator raises appropriate error when chromadb is not available."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.utilities.embedding_configurator" in sys.modules:
importlib.reload(sys.modules["crewai.utilities.embedding_configurator"])
from crewai.utilities.embedding_configurator import EmbeddingConfigurator, HAS_CHROMADB
assert not HAS_CHROMADB
with pytest.raises(ImportError) as excinfo:
configurator = EmbeddingConfigurator()
configurator.configure_embedder()
assert "ChromaDB is required for embedding functionality" in str(excinfo.value)