Fix #2919: Make chromadb an optional dependency to resolve package conflicts

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-05-30 08:56:35 +00:00
parent 55ed91e313
commit 7b129fc847
5 changed files with 482 additions and 170 deletions

View File

@@ -21,7 +21,6 @@ dependencies = [
"opentelemetry-sdk>=1.30.0", "opentelemetry-sdk>=1.30.0",
"opentelemetry-exporter-otlp-proto-http>=1.30.0", "opentelemetry-exporter-otlp-proto-http>=1.30.0",
# Data Handling # Data Handling
"chromadb>=0.5.23",
"openpyxl>=3.1.5", "openpyxl>=3.1.5",
"pyvis>=0.3.2", "pyvis>=0.3.2",
# Authentication and Security # Authentication and Security
@@ -49,6 +48,9 @@ tools = ["crewai-tools~=0.45.0"]
embeddings = [ embeddings = [
"tiktoken~=0.7.0" "tiktoken~=0.7.0"
] ]
storage = [
"chromadb>=0.5.23"
]
agentops = ["agentops>=0.3.0"] agentops = ["agentops>=0.3.0"]
fastembed = ["fastembed>=0.4.1"] fastembed = ["fastembed>=0.4.1"]
pdfplumber = [ pdfplumber = [

View File

@@ -6,11 +6,19 @@ import os
import shutil import shutil
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
try:
import chromadb import chromadb
import chromadb.errors import chromadb.errors
from chromadb.api import ClientAPI from chromadb.api import ClientAPI
from chromadb.api.types import OneOrMany from chromadb.api.types import OneOrMany
from chromadb.config import Settings from chromadb.config import Settings
HAS_CHROMADB = True
except ImportError:
chromadb = None # type: ignore
ClientAPI = Any # type: ignore
OneOrMany = Any # type: ignore
Settings = Any # type: ignore
HAS_CHROMADB = False
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
from crewai.utilities import EmbeddingConfigurator from crewai.utilities import EmbeddingConfigurator
@@ -62,6 +70,12 @@ class KnowledgeStorage(BaseKnowledgeStorage):
filter: Optional[dict] = None, filter: Optional[dict] = None,
score_threshold: float = 0.35, score_threshold: float = 0.35,
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
with suppress_logging(): with suppress_logging():
if self.collection: if self.collection:
fetched = self.collection.query( fetched = self.collection.query(
@@ -84,7 +98,14 @@ class KnowledgeStorage(BaseKnowledgeStorage):
raise Exception("Collection not initialized") raise Exception("Collection not initialized")
def initialize_knowledge_storage(self): def initialize_knowledge_storage(self):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
base_path = os.path.join(db_storage_path(), "knowledge") base_path = os.path.join(db_storage_path(), "knowledge")
try:
chroma_client = chromadb.PersistentClient( chroma_client = chromadb.PersistentClient(
path=base_path, path=base_path,
settings=Settings(allow_reset=True), settings=Settings(allow_reset=True),
@@ -107,9 +128,21 @@ class KnowledgeStorage(BaseKnowledgeStorage):
raise Exception("Vector Database Client not initialized") raise Exception("Vector Database Client not initialized")
except Exception: except Exception:
raise Exception("Failed to create or get collection") raise Exception("Failed to create or get collection")
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def reset(self): def reset(self):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY) base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY)
try:
if not self.app: if not self.app:
self.app = chromadb.PersistentClient( self.app = chromadb.PersistentClient(
path=base_path, path=base_path,
@@ -120,12 +153,23 @@ class KnowledgeStorage(BaseKnowledgeStorage):
shutil.rmtree(base_path) shutil.rmtree(base_path)
self.app = None self.app = None
self.collection = None self.collection = None
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def save( def save(
self, self,
documents: List[str], documents: List[str],
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None, metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
): ):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
if not self.collection: if not self.collection:
raise Exception("Collection not initialized") raise Exception("Collection not initialized")
@@ -156,7 +200,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
filtered_ids.append(doc_id) filtered_ids.append(doc_id)
# If we have no metadata at all, set it to None # If we have no metadata at all, set it to None
final_metadata: Optional[OneOrMany[chromadb.Metadata]] = ( final_metadata: Optional[OneOrMany[Any]] = (
None if all(m is None for m in filtered_metadata) else filtered_metadata None if all(m is None for m in filtered_metadata) else filtered_metadata
) )
@@ -165,7 +209,13 @@ class KnowledgeStorage(BaseKnowledgeStorage):
metadatas=final_metadata, metadatas=final_metadata,
ids=filtered_ids, ids=filtered_ids,
) )
except chromadb.errors.InvalidDimensionException as e: except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
except Exception as e:
if HAS_CHROMADB and isinstance(e, chromadb.errors.InvalidDimensionException):
Logger(verbose=True).log( Logger(verbose=True).log(
"error", "error",
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`", "Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
@@ -176,11 +226,18 @@ class KnowledgeStorage(BaseKnowledgeStorage):
"across all operations with this collection." "across all operations with this collection."
"Try resetting the collection using `crewai reset-memories -a`" "Try resetting the collection using `crewai reset-memories -a`"
) from e ) from e
except Exception as e: else:
Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red") Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red")
raise raise
def _create_default_embedding_function(self): def _create_default_embedding_function(self):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import ( from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction, OpenAIEmbeddingFunction,
) )
@@ -188,6 +245,11 @@ class KnowledgeStorage(BaseKnowledgeStorage):
return OpenAIEmbeddingFunction( return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
) )
except ImportError:
raise ImportError(
"ChromaDB is required for knowledge storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None: def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None:
"""Set the embedding configuration for the knowledge storage. """Set the embedding configuration for the knowledge storage.

View File

@@ -4,9 +4,14 @@ import logging
import os import os
import shutil import shutil
import uuid import uuid
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional, Union
try:
from chromadb.api import ClientAPI from chromadb.api import ClientAPI
HAS_CHROMADB = True
except ImportError:
ClientAPI = Any # type: ignore
HAS_CHROMADB = False
from crewai.memory.storage.base_rag_storage import BaseRAGStorage from crewai.memory.storage.base_rag_storage import BaseRAGStorage
from crewai.utilities import EmbeddingConfigurator from crewai.utilities import EmbeddingConfigurator
@@ -60,6 +65,13 @@ class RAGStorage(BaseRAGStorage):
self.embedder_config = configurator.configure_embedder(self.embedder_config) self.embedder_config = configurator.configure_embedder(self.embedder_config)
def _initialize_app(self): def _initialize_app(self):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)
try:
import chromadb import chromadb
from chromadb.config import Settings from chromadb.config import Settings
@@ -79,6 +91,11 @@ class RAGStorage(BaseRAGStorage):
self.collection = self.app.create_collection( self.collection = self.app.create_collection(
name=self.type, embedding_function=self.embedder_config name=self.type, embedding_function=self.embedder_config
) )
except ImportError:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)
def _sanitize_role(self, role: str) -> str: def _sanitize_role(self, role: str) -> str:
""" """
@@ -165,6 +182,13 @@ class RAGStorage(BaseRAGStorage):
) )
def _create_default_embedding_function(self): def _create_default_embedding_function(self):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import ( from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction, OpenAIEmbeddingFunction,
) )
@@ -172,3 +196,8 @@ class RAGStorage(BaseRAGStorage):
return OpenAIEmbeddingFunction( return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
) )
except ImportError:
raise ImportError(
"ChromaDB is required for memory storage features. "
"Please install it with 'pip install crewai[storage]'"
)

View File

@@ -1,8 +1,21 @@
import os import os
from typing import Any, Dict, Optional, cast from typing import Any, Dict, Optional, Union, cast, Protocol
try:
from chromadb import Documents, EmbeddingFunction, Embeddings from chromadb import Documents, EmbeddingFunction, Embeddings
from chromadb.api.types import validate_embedding_function from chromadb.api.types import validate_embedding_function
HAS_CHROMADB = True
except ImportError:
HAS_CHROMADB = False
class EmbeddingFunction(Protocol):
def __call__(self, input: Any) -> Any: ...
Documents = Any
Embeddings = Any
def validate_embedding_function(func: Any) -> None:
pass
class EmbeddingConfigurator: class EmbeddingConfigurator:
@@ -26,6 +39,12 @@ class EmbeddingConfigurator:
embedder_config: Optional[Dict[str, Any]] = None, embedder_config: Optional[Dict[str, Any]] = None,
) -> EmbeddingFunction: ) -> EmbeddingFunction:
"""Configures and returns an embedding function based on the provided config.""" """Configures and returns an embedding function based on the provided config."""
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
if embedder_config is None: if embedder_config is None:
return self._create_default_embedding_function() return self._create_default_embedding_function()
@@ -47,6 +66,13 @@ class EmbeddingConfigurator:
@staticmethod @staticmethod
def _create_default_embedding_function(): def _create_default_embedding_function():
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import ( from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction, OpenAIEmbeddingFunction,
) )
@@ -54,9 +80,21 @@ class EmbeddingConfigurator:
return OpenAIEmbeddingFunction( return OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_openai(config, model_name): def _configure_openai(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import ( from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction, OpenAIEmbeddingFunction,
) )
@@ -72,9 +110,21 @@ class EmbeddingConfigurator:
deployment_id=config.get("deployment_id", None), deployment_id=config.get("deployment_id", None),
organization_id=config.get("organization_id", None), organization_id=config.get("organization_id", None),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_azure(config, model_name): def _configure_azure(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.openai_embedding_function import ( from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction, OpenAIEmbeddingFunction,
) )
@@ -90,9 +140,21 @@ class EmbeddingConfigurator:
deployment_id=config.get("deployment_id"), deployment_id=config.get("deployment_id"),
organization_id=config.get("organization_id"), organization_id=config.get("organization_id"),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_ollama(config, model_name): def _configure_ollama(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.ollama_embedding_function import ( from chromadb.utils.embedding_functions.ollama_embedding_function import (
OllamaEmbeddingFunction, OllamaEmbeddingFunction,
) )
@@ -101,9 +163,21 @@ class EmbeddingConfigurator:
url=config.get("url", "http://localhost:11434/api/embeddings"), url=config.get("url", "http://localhost:11434/api/embeddings"),
model_name=model_name, model_name=model_name,
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_vertexai(config, model_name): def _configure_vertexai(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.google_embedding_function import ( from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleVertexEmbeddingFunction, GoogleVertexEmbeddingFunction,
) )
@@ -114,9 +188,21 @@ class EmbeddingConfigurator:
project_id=config.get("project_id"), project_id=config.get("project_id"),
region=config.get("region"), region=config.get("region"),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_google(config, model_name): def _configure_google(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.google_embedding_function import ( from chromadb.utils.embedding_functions.google_embedding_function import (
GoogleGenerativeAiEmbeddingFunction, GoogleGenerativeAiEmbeddingFunction,
) )
@@ -126,9 +212,21 @@ class EmbeddingConfigurator:
api_key=config.get("api_key"), api_key=config.get("api_key"),
task_type=config.get("task_type"), task_type=config.get("task_type"),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_cohere(config, model_name): def _configure_cohere(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.cohere_embedding_function import ( from chromadb.utils.embedding_functions.cohere_embedding_function import (
CohereEmbeddingFunction, CohereEmbeddingFunction,
) )
@@ -137,9 +235,21 @@ class EmbeddingConfigurator:
model_name=model_name, model_name=model_name,
api_key=config.get("api_key"), api_key=config.get("api_key"),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_voyageai(config, model_name): def _configure_voyageai(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.voyageai_embedding_function import ( from chromadb.utils.embedding_functions.voyageai_embedding_function import (
VoyageAIEmbeddingFunction, VoyageAIEmbeddingFunction,
) )
@@ -148,9 +258,21 @@ class EmbeddingConfigurator:
model_name=model_name, model_name=model_name,
api_key=config.get("api_key"), api_key=config.get("api_key"),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_bedrock(config, model_name): def _configure_bedrock(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
AmazonBedrockEmbeddingFunction, AmazonBedrockEmbeddingFunction,
) )
@@ -160,9 +282,21 @@ class EmbeddingConfigurator:
if model_name is not None: if model_name is not None:
kwargs["model_name"] = model_name kwargs["model_name"] = model_name
return AmazonBedrockEmbeddingFunction(**kwargs) return AmazonBedrockEmbeddingFunction(**kwargs)
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_huggingface(config, model_name): def _configure_huggingface(config, model_name):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
try:
from chromadb.utils.embedding_functions.huggingface_embedding_function import ( from chromadb.utils.embedding_functions.huggingface_embedding_function import (
HuggingFaceEmbeddingServer, HuggingFaceEmbeddingServer,
) )
@@ -170,6 +304,11 @@ class EmbeddingConfigurator:
return HuggingFaceEmbeddingServer( return HuggingFaceEmbeddingServer(
url=config.get("api_url"), url=config.get("api_url"),
) )
except ImportError:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
@staticmethod @staticmethod
def _configure_watson(config, model_name): def _configure_watson(config, model_name):
@@ -212,6 +351,12 @@ class EmbeddingConfigurator:
@staticmethod @staticmethod
def _configure_custom(config): def _configure_custom(config):
if not HAS_CHROMADB:
raise ImportError(
"ChromaDB is required for embedding functionality. "
"Please install it with 'pip install crewai[storage]'"
)
custom_embedder = config.get("embedder") custom_embedder = config.get("embedder")
if isinstance(custom_embedder, EmbeddingFunction): if isinstance(custom_embedder, EmbeddingFunction):
try: try:

View File

@@ -0,0 +1,74 @@
import pytest
import importlib
import sys
from unittest.mock import patch
def test_import_without_chromadb():
"""Test that crewai can be imported without chromadb."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.memory.storage.rag_storage" in sys.modules:
importlib.reload(sys.modules["crewai.memory.storage.rag_storage"])
if "crewai.knowledge.storage.knowledge_storage" in sys.modules:
importlib.reload(sys.modules["crewai.knowledge.storage.knowledge_storage"])
if "crewai.utilities.embedding_configurator" in sys.modules:
importlib.reload(sys.modules["crewai.utilities.embedding_configurator"])
import crewai
from crewai import Agent, Task, Crew, Process
agent = Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")
task = Task(description="Test Task", agent=agent)
crew = Crew(agents=[agent], tasks=[task], process=Process.sequential)
def test_memory_storage_without_chromadb():
"""Test that memory storage raises appropriate error when chromadb is not available."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.memory.storage.rag_storage" in sys.modules:
importlib.reload(sys.modules["crewai.memory.storage.rag_storage"])
from crewai.memory.storage.rag_storage import RAGStorage, HAS_CHROMADB
assert not HAS_CHROMADB
with pytest.raises(ImportError) as excinfo:
storage = RAGStorage()
storage._initialize_app()
assert "ChromaDB is required for memory storage features" in str(excinfo.value)
def test_knowledge_storage_without_chromadb():
"""Test that knowledge storage raises appropriate error when chromadb is not available."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.knowledge.storage.knowledge_storage" in sys.modules:
importlib.reload(sys.modules["crewai.knowledge.storage.knowledge_storage"])
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage, HAS_CHROMADB
assert not HAS_CHROMADB
with pytest.raises(ImportError) as excinfo:
storage = KnowledgeStorage()
storage.initialize_knowledge_storage()
assert "ChromaDB is required for knowledge storage features" in str(excinfo.value)
def test_embedding_configurator_without_chromadb():
"""Test that embedding configurator raises appropriate error when chromadb is not available."""
with patch.dict(sys.modules, {"chromadb": None}):
if "crewai.utilities.embedding_configurator" in sys.modules:
importlib.reload(sys.modules["crewai.utilities.embedding_configurator"])
from crewai.utilities.embedding_configurator import EmbeddingConfigurator, HAS_CHROMADB
assert not HAS_CHROMADB
with pytest.raises(ImportError) as excinfo:
configurator = EmbeddingConfigurator()
configurator.configure_embedder()
assert "ChromaDB is required for embedding functionality" in str(excinfo.value)