diff --git a/pyproject.toml b/pyproject.toml index 7528a2ecc..d5b058503 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,6 @@ dependencies = [ "opentelemetry-sdk>=1.30.0", "opentelemetry-exporter-otlp-proto-http>=1.30.0", # Data Handling - "chromadb>=0.5.23", "openpyxl>=3.1.5", "pyvis>=0.3.2", # Authentication and Security @@ -67,6 +66,9 @@ docling = [ aisuite = [ "aisuite>=0.1.10", ] +chromadb = [ + "chromadb>=0.5.23", +] [tool.uv] dev-dependencies = [ diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index d49cc9876..0e7f935ae 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -6,11 +6,18 @@ import os import shutil from typing import Any, Dict, List, Optional, Union -import chromadb -import chromadb.errors -from chromadb.api import ClientAPI -from chromadb.api.types import OneOrMany -from chromadb.config import Settings +try: + import chromadb + import chromadb.errors + from chromadb.api import ClientAPI + from chromadb.api.types import OneOrMany + from chromadb.config import Settings + Collection = chromadb.Collection +except ImportError: + chromadb = None + ClientAPI = None + OneOrMany = Any + Collection = Any from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage from crewai.utilities import EmbeddingConfigurator @@ -43,7 +50,7 @@ class KnowledgeStorage(BaseKnowledgeStorage): search efficiency. """ - collection: Optional[chromadb.Collection] = None + collection: Optional[Collection] = None collection_name: Optional[str] = "knowledge" app: Optional[ClientAPI] = None @@ -62,6 +69,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): filter: Optional[dict] = None, score_threshold: float = 0.35, ) -> List[Dict[str, Any]]: + if not chromadb: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) + with suppress_logging(): if self.collection: fetched = self.collection.query( @@ -84,6 +96,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): raise Exception("Collection not initialized") def initialize_knowledge_storage(self): + if not chromadb: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) + base_path = os.path.join(db_storage_path(), "knowledge") chroma_client = chromadb.PersistentClient( path=base_path, @@ -109,6 +126,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): raise Exception("Failed to create or get collection") def reset(self): + if not chromadb: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) + base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY) if not self.app: self.app = chromadb.PersistentClient( @@ -126,6 +148,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): documents: List[str], metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None, ): + if not chromadb: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) + if not self.collection: raise Exception("Collection not initialized") @@ -181,13 +208,23 @@ class KnowledgeStorage(BaseKnowledgeStorage): raise def _create_default_embedding_function(self): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) + if not chromadb: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) + + try: + from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, + ) - return OpenAIEmbeddingFunction( - api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" - ) + return OpenAIEmbeddingFunction( + api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None: """Set the embedding configuration for the knowledge storage. diff --git a/src/crewai/memory/storage/rag_storage.py b/src/crewai/memory/storage/rag_storage.py index fd4c77838..4499c50c3 100644 --- a/src/crewai/memory/storage/rag_storage.py +++ b/src/crewai/memory/storage/rag_storage.py @@ -6,7 +6,13 @@ import shutil import uuid from typing import Any, Dict, List, Optional -from chromadb.api import ClientAPI +try: + from chromadb.api import ClientAPI + import chromadb + Collection = chromadb.Collection +except ImportError: + ClientAPI = None + Collection = Any from crewai.memory.storage.base_rag_storage import BaseRAGStorage from crewai.utilities import EmbeddingConfigurator @@ -37,7 +43,8 @@ class RAGStorage(BaseRAGStorage): search efficiency. """ - app: ClientAPI | None = None + app: Optional[ClientAPI] = None + collection: Optional[Collection] = None def __init__( self, type, allow_reset=True, embedder_config=None, crew=None, path=None @@ -60,8 +67,13 @@ class RAGStorage(BaseRAGStorage): self.embedder_config = configurator.configure_embedder(self.embedder_config) def _initialize_app(self): - import chromadb - from chromadb.config import Settings + try: + import chromadb + from chromadb.config import Settings + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) self._set_embedder_config() chroma_client = chromadb.PersistentClient( diff --git a/src/crewai/utilities/embedding_configurator.py b/src/crewai/utilities/embedding_configurator.py index e523b60f0..fabdd8d70 100644 --- a/src/crewai/utilities/embedding_configurator.py +++ b/src/crewai/utilities/embedding_configurator.py @@ -1,8 +1,18 @@ import os -from typing import Any, Dict, Optional, cast +from typing import Any, Dict, Optional, Union, cast -from chromadb import Documents, EmbeddingFunction, Embeddings -from chromadb.api.types import validate_embedding_function +Documents = Union[str, list[str]] +Embeddings = list[list[float]] + +try: + from chromadb import EmbeddingFunction + from chromadb.api.types import validate_embedding_function +except ImportError: + class EmbeddingFunction: + def __call__(self, input: Documents) -> Embeddings: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) class EmbeddingConfigurator: @@ -47,190 +57,250 @@ class EmbeddingConfigurator: @staticmethod def _create_default_embedding_function(): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, + ) - return OpenAIEmbeddingFunction( - api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" - ) + return OpenAIEmbeddingFunction( + api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_openai(config, model_name): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, + ) - return OpenAIEmbeddingFunction( - api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"), - model_name=model_name, - api_base=config.get("api_base", None), - api_type=config.get("api_type", None), - api_version=config.get("api_version", None), - default_headers=config.get("default_headers", None), - dimensions=config.get("dimensions", None), - deployment_id=config.get("deployment_id", None), - organization_id=config.get("organization_id", None), - ) + return OpenAIEmbeddingFunction( + api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"), + model_name=model_name, + api_base=config.get("api_base", None), + api_type=config.get("api_type", None), + api_version=config.get("api_version", None), + default_headers=config.get("default_headers", None), + dimensions=config.get("dimensions", None), + deployment_id=config.get("deployment_id", None), + organization_id=config.get("organization_id", None), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_azure(config, model_name): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, + ) - return OpenAIEmbeddingFunction( - api_key=config.get("api_key"), - api_base=config.get("api_base"), - api_type=config.get("api_type", "azure"), - api_version=config.get("api_version"), - model_name=model_name, - default_headers=config.get("default_headers"), - dimensions=config.get("dimensions"), - deployment_id=config.get("deployment_id"), - organization_id=config.get("organization_id"), - ) + return OpenAIEmbeddingFunction( + api_key=config.get("api_key"), + api_base=config.get("api_base"), + api_type=config.get("api_type", "azure"), + api_version=config.get("api_version"), + model_name=model_name, + default_headers=config.get("default_headers"), + dimensions=config.get("dimensions"), + deployment_id=config.get("deployment_id"), + organization_id=config.get("organization_id"), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_ollama(config, model_name): - from chromadb.utils.embedding_functions.ollama_embedding_function import ( - OllamaEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.ollama_embedding_function import ( + OllamaEmbeddingFunction, + ) - return OllamaEmbeddingFunction( - url=config.get("url", "http://localhost:11434/api/embeddings"), - model_name=model_name, - ) + return OllamaEmbeddingFunction( + url=config.get("url", "http://localhost:11434/api/embeddings"), + model_name=model_name, + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_vertexai(config, model_name): - from chromadb.utils.embedding_functions.google_embedding_function import ( - GoogleVertexEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.google_embedding_function import ( + GoogleVertexEmbeddingFunction, + ) - return GoogleVertexEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - project_id=config.get("project_id"), - region=config.get("region"), - ) + return GoogleVertexEmbeddingFunction( + model_name=model_name, + api_key=config.get("api_key"), + project_id=config.get("project_id"), + region=config.get("region"), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_google(config, model_name): - from chromadb.utils.embedding_functions.google_embedding_function import ( - GoogleGenerativeAiEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.google_embedding_function import ( + GoogleGenerativeAiEmbeddingFunction, + ) - return GoogleGenerativeAiEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - task_type=config.get("task_type"), - ) + return GoogleGenerativeAiEmbeddingFunction( + model_name=model_name, + api_key=config.get("api_key"), + task_type=config.get("task_type"), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_cohere(config, model_name): - from chromadb.utils.embedding_functions.cohere_embedding_function import ( - CohereEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.cohere_embedding_function import ( + CohereEmbeddingFunction, + ) - return CohereEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - ) + return CohereEmbeddingFunction( + model_name=model_name, + api_key=config.get("api_key"), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_voyageai(config, model_name): - from chromadb.utils.embedding_functions.voyageai_embedding_function import ( - VoyageAIEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.voyageai_embedding_function import ( + VoyageAIEmbeddingFunction, + ) - return VoyageAIEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - ) + return VoyageAIEmbeddingFunction( + model_name=model_name, + api_key=config.get("api_key"), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_bedrock(config, model_name): - from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( - AmazonBedrockEmbeddingFunction, - ) + try: + from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( + AmazonBedrockEmbeddingFunction, + ) - # Allow custom model_name override with backwards compatibility - kwargs = {"session": config.get("session")} - if model_name is not None: - kwargs["model_name"] = model_name - return AmazonBedrockEmbeddingFunction(**kwargs) + # Allow custom model_name override with backwards compatibility + kwargs = {"session": config.get("session")} + if model_name is not None: + kwargs["model_name"] = model_name + return AmazonBedrockEmbeddingFunction(**kwargs) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_huggingface(config, model_name): - from chromadb.utils.embedding_functions.huggingface_embedding_function import ( - HuggingFaceEmbeddingServer, - ) + try: + from chromadb.utils.embedding_functions.huggingface_embedding_function import ( + HuggingFaceEmbeddingServer, + ) - return HuggingFaceEmbeddingServer( - url=config.get("api_url"), - ) + return HuggingFaceEmbeddingServer( + url=config.get("api_url"), + ) + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_watson(config, model_name): try: - import ibm_watsonx_ai.foundation_models as watson_models - from ibm_watsonx_ai import Credentials - from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames as EmbedParams - except ImportError as e: + try: + import ibm_watsonx_ai.foundation_models as watson_models + from ibm_watsonx_ai import Credentials + from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames as EmbedParams + except ImportError as e: + raise ImportError( + "IBM Watson dependencies are not installed. Please install them to use Watson embedding." + ) from e + + class WatsonEmbeddingFunction(EmbeddingFunction): + def __call__(self, input: Documents) -> Embeddings: + if isinstance(input, str): + input = [input] + + embed_params = { + EmbedParams.TRUNCATE_INPUT_TOKENS: 3, + EmbedParams.RETURN_OPTIONS: {"input_text": True}, + } + + embedding = watson_models.Embeddings( + model_id=config.get("model"), + params=embed_params, + credentials=Credentials( + api_key=config.get("api_key"), url=config.get("api_url") + ), + project_id=config.get("project_id"), + ) + + try: + embeddings = embedding.embed_documents(input) + return cast(Embeddings, embeddings) + except Exception as e: + print("Error during Watson embedding:", e) + raise e + + return WatsonEmbeddingFunction() + except ImportError: raise ImportError( - "IBM Watson dependencies are not installed. Please install them to use Watson embedding." - ) from e - - class WatsonEmbeddingFunction(EmbeddingFunction): - def __call__(self, input: Documents) -> Embeddings: - if isinstance(input, str): - input = [input] - - embed_params = { - EmbedParams.TRUNCATE_INPUT_TOKENS: 3, - EmbedParams.RETURN_OPTIONS: {"input_text": True}, - } - - embedding = watson_models.Embeddings( - model_id=config.get("model"), - params=embed_params, - credentials=Credentials( - api_key=config.get("api_key"), url=config.get("api_url") - ), - project_id=config.get("project_id"), - ) - - try: - embeddings = embedding.embed_documents(input) - return cast(Embeddings, embeddings) - except Exception as e: - print("Error during Watson embedding:", e) - raise e - - return WatsonEmbeddingFunction() + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." + ) @staticmethod def _configure_custom(config): - custom_embedder = config.get("embedder") - if isinstance(custom_embedder, EmbeddingFunction): - try: - validate_embedding_function(custom_embedder) - return custom_embedder - except Exception as e: - raise ValueError(f"Invalid custom embedding function: {str(e)}") - elif callable(custom_embedder): - try: - instance = custom_embedder() - if isinstance(instance, EmbeddingFunction): - validate_embedding_function(instance) - return instance + try: + custom_embedder = config.get("embedder") + if isinstance(custom_embedder, EmbeddingFunction): + try: + validate_embedding_function(custom_embedder) + return custom_embedder + except Exception as e: + raise ValueError(f"Invalid custom embedding function: {str(e)}") + elif callable(custom_embedder): + try: + instance = custom_embedder() + if isinstance(instance, EmbeddingFunction): + validate_embedding_function(instance) + return instance + raise ValueError( + "Custom embedder does not create an EmbeddingFunction instance" + ) + except Exception as e: + raise ValueError(f"Error instantiating custom embedder: {str(e)}") + else: raise ValueError( - "Custom embedder does not create an EmbeddingFunction instance" + "Custom embedder must be an instance of `EmbeddingFunction` or a callable that creates one" ) - except Exception as e: - raise ValueError(f"Error instantiating custom embedder: {str(e)}") - else: - raise ValueError( - "Custom embedder must be an instance of `EmbeddingFunction` or a callable that creates one" + except ImportError: + raise ImportError( + "ChromaDB is not installed. Please install it with `pip install crewai[chromadb]`." ) diff --git a/tests/memory/long_term_memory_test.py b/tests/memory/long_term_memory_test.py index 3639054e3..29d07f175 100644 --- a/tests/memory/long_term_memory_test.py +++ b/tests/memory/long_term_memory_test.py @@ -1,5 +1,11 @@ import pytest +chromadb_not_installed = False +try: + import chromadb +except ImportError: + chromadb_not_installed = True + from crewai.memory.long_term.long_term_memory import LongTermMemory from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem @@ -10,6 +16,7 @@ def long_term_memory(): return LongTermMemory() +@pytest.mark.skipif(chromadb_not_installed, reason="ChromaDB is not installed") def test_save_and_search(long_term_memory): memory = LongTermMemoryItem( agent="test_agent", diff --git a/tests/memory/short_term_memory_test.py b/tests/memory/short_term_memory_test.py index 6cde2a044..8e8710aee 100644 --- a/tests/memory/short_term_memory_test.py +++ b/tests/memory/short_term_memory_test.py @@ -2,6 +2,12 @@ from unittest.mock import patch import pytest +chromadb_not_installed = False +try: + import chromadb +except ImportError: + chromadb_not_installed = True + from crewai.agent import Agent from crewai.crew import Crew from crewai.memory.short_term.short_term_memory import ShortTermMemory @@ -28,6 +34,7 @@ def short_term_memory(): return ShortTermMemory(crew=Crew(agents=[agent], tasks=[task])) +@pytest.mark.skipif(chromadb_not_installed, reason="ChromaDB is not installed") def test_save_and_search(short_term_memory): memory = ShortTermMemoryItem( data="""test value test value test value test value test value test value diff --git a/tests/storage/test_optional_chromadb.py b/tests/storage/test_optional_chromadb.py new file mode 100644 index 000000000..bb99f9279 --- /dev/null +++ b/tests/storage/test_optional_chromadb.py @@ -0,0 +1,25 @@ +import unittest +from unittest.mock import patch, MagicMock +import sys +import pytest +from typing import Any, Dict, List, Optional + + +class TestOptionalChromadb(unittest.TestCase): + def test_rag_storage_import_error(self): + """Test that RAGStorage raises an ImportError when chromadb is not installed.""" + with patch.dict(sys.modules, {"chromadb": None}): + with pytest.raises(ImportError) as excinfo: + from crewai.memory.storage.rag_storage import RAGStorage + storage = RAGStorage(type="test") + + assert "ChromaDB is not installed" in str(excinfo.value) + + def test_knowledge_storage_import_error(self): + """Test that KnowledgeStorage raises an ImportError when chromadb is not installed.""" + with patch.dict(sys.modules, {"chromadb": None}): + with pytest.raises(ImportError) as excinfo: + from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage + storage = KnowledgeStorage() + + assert "ChromaDB is not installed" in str(excinfo.value)