mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-25 16:18:13 +00:00
Compare commits
4 Commits
devin/1768
...
devin/1742
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9fa65f724f | ||
|
|
13e1aa96de | ||
|
|
7fb76bb858 | ||
|
|
486cf58c3b |
@@ -4,13 +4,34 @@ import io
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import warnings
|
||||||
from typing import Any, Dict, List, Optional, Union, cast
|
from typing import Any, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
import chromadb
|
# Initialize module import status
|
||||||
import chromadb.errors
|
CHROMADB_AVAILABLE = False
|
||||||
from chromadb.api import ClientAPI
|
|
||||||
from chromadb.api.types import OneOrMany
|
# Define placeholder types
|
||||||
from chromadb.config import Settings
|
class DummyClientAPI:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class DummySettings:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Try to import chromadb-related modules with proper error handling
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
import chromadb.errors
|
||||||
|
from chromadb.api import ClientAPI
|
||||||
|
from chromadb.api.types import OneOrMany
|
||||||
|
from chromadb.config import Settings
|
||||||
|
CHROMADB_AVAILABLE = True
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import chromadb: {str(e)}. Knowledge functionality will be limited.")
|
||||||
|
# Use dummy classes when imports fail
|
||||||
|
chromadb = None
|
||||||
|
ClientAPI = DummyClientAPI
|
||||||
|
OneOrMany = Any
|
||||||
|
Settings = DummySettings
|
||||||
|
|
||||||
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
|
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
|
||||||
from crewai.utilities import EmbeddingConfigurator
|
from crewai.utilities import EmbeddingConfigurator
|
||||||
@@ -42,9 +63,9 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
|||||||
search efficiency.
|
search efficiency.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
collection: Optional[chromadb.Collection] = None
|
collection = None # Type annotation removed to handle case when chromadb is not available
|
||||||
collection_name: Optional[str] = "knowledge"
|
collection_name: Optional[str] = "knowledge"
|
||||||
app: Optional[ClientAPI] = None
|
app = None # Type annotation removed to handle case when chromadb is not available
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -61,37 +82,52 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
|||||||
filter: Optional[dict] = None,
|
filter: Optional[dict] = None,
|
||||||
score_threshold: float = 0.35,
|
score_threshold: float = 0.35,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
|
if not CHROMADB_AVAILABLE:
|
||||||
|
logging.warning("Cannot search knowledge as chromadb is not available.")
|
||||||
|
return []
|
||||||
|
|
||||||
with suppress_logging():
|
with suppress_logging():
|
||||||
if self.collection:
|
if self.collection:
|
||||||
fetched = self.collection.query(
|
try:
|
||||||
query_texts=query,
|
fetched = self.collection.query(
|
||||||
n_results=limit,
|
query_texts=query,
|
||||||
where=filter,
|
n_results=limit,
|
||||||
)
|
where=filter,
|
||||||
results = []
|
)
|
||||||
for i in range(len(fetched["ids"][0])): # type: ignore
|
results = []
|
||||||
result = {
|
for i in range(len(fetched["ids"][0])): # type: ignore
|
||||||
"id": fetched["ids"][0][i], # type: ignore
|
result = {
|
||||||
"metadata": fetched["metadatas"][0][i], # type: ignore
|
"id": fetched["ids"][0][i], # type: ignore
|
||||||
"context": fetched["documents"][0][i], # type: ignore
|
"metadata": fetched["metadatas"][0][i], # type: ignore
|
||||||
"score": fetched["distances"][0][i], # type: ignore
|
"context": fetched["documents"][0][i], # type: ignore
|
||||||
}
|
"score": fetched["distances"][0][i], # type: ignore
|
||||||
if result["score"] >= score_threshold:
|
}
|
||||||
results.append(result)
|
if result["score"] >= score_threshold:
|
||||||
return results
|
results.append(result)
|
||||||
|
return results
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error during knowledge search: {str(e)}")
|
||||||
|
return []
|
||||||
else:
|
else:
|
||||||
raise Exception("Collection not initialized")
|
logging.warning("Collection not initialized")
|
||||||
|
return []
|
||||||
|
|
||||||
def initialize_knowledge_storage(self):
|
def initialize_knowledge_storage(self):
|
||||||
base_path = os.path.join(db_storage_path(), "knowledge")
|
if not CHROMADB_AVAILABLE:
|
||||||
chroma_client = chromadb.PersistentClient(
|
logging.warning("Cannot initialize knowledge storage as chromadb is not available.")
|
||||||
path=base_path,
|
self.app = None
|
||||||
settings=Settings(allow_reset=True),
|
self.collection = None
|
||||||
)
|
return
|
||||||
|
|
||||||
self.app = chroma_client
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
base_path = os.path.join(db_storage_path(), "knowledge")
|
||||||
|
chroma_client = chromadb.PersistentClient(
|
||||||
|
path=base_path,
|
||||||
|
settings=Settings(allow_reset=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.app = chroma_client
|
||||||
|
|
||||||
collection_name = (
|
collection_name = (
|
||||||
f"knowledge_{self.collection_name}"
|
f"knowledge_{self.collection_name}"
|
||||||
if self.collection_name
|
if self.collection_name
|
||||||
@@ -102,30 +138,46 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
|||||||
name=collection_name, embedding_function=self.embedder
|
name=collection_name, embedding_function=self.embedder
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception("Vector Database Client not initialized")
|
logging.warning("Vector Database Client not initialized")
|
||||||
except Exception:
|
self.collection = None
|
||||||
raise Exception("Failed to create or get collection")
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to create or get collection: {str(e)}")
|
||||||
|
self.app = None
|
||||||
|
self.collection = None
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY)
|
if not CHROMADB_AVAILABLE:
|
||||||
if not self.app:
|
logging.warning("Cannot reset knowledge storage as chromadb is not available.")
|
||||||
self.app = chromadb.PersistentClient(
|
return
|
||||||
path=base_path,
|
|
||||||
settings=Settings(allow_reset=True),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.app.reset()
|
try:
|
||||||
shutil.rmtree(base_path)
|
base_path = os.path.join(db_storage_path(), KNOWLEDGE_DIRECTORY)
|
||||||
self.app = None
|
if not self.app:
|
||||||
self.collection = None
|
self.app = chromadb.PersistentClient(
|
||||||
|
path=base_path,
|
||||||
|
settings=Settings(allow_reset=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.app.reset()
|
||||||
|
shutil.rmtree(base_path)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error during knowledge reset: {str(e)}")
|
||||||
|
finally:
|
||||||
|
self.app = None
|
||||||
|
self.collection = None
|
||||||
|
|
||||||
def save(
|
def save(
|
||||||
self,
|
self,
|
||||||
documents: List[str],
|
documents: List[str],
|
||||||
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
||||||
):
|
):
|
||||||
|
if not CHROMADB_AVAILABLE:
|
||||||
|
logging.warning("Cannot save to knowledge storage as chromadb is not available.")
|
||||||
|
return
|
||||||
|
|
||||||
if not self.collection:
|
if not self.collection:
|
||||||
raise Exception("Collection not initialized")
|
logging.warning("Collection not initialized")
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Create a dictionary to store unique documents
|
# Create a dictionary to store unique documents
|
||||||
@@ -154,38 +206,46 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
|||||||
filtered_ids.append(doc_id)
|
filtered_ids.append(doc_id)
|
||||||
|
|
||||||
# If we have no metadata at all, set it to None
|
# If we have no metadata at all, set it to None
|
||||||
final_metadata: Optional[OneOrMany[chromadb.Metadata]] = (
|
final_metadata = None
|
||||||
None if all(m is None for m in filtered_metadata) else filtered_metadata
|
if not all(m is None for m in filtered_metadata):
|
||||||
)
|
final_metadata = filtered_metadata
|
||||||
|
|
||||||
self.collection.upsert(
|
self.collection.upsert(
|
||||||
documents=filtered_docs,
|
documents=filtered_docs,
|
||||||
metadatas=final_metadata,
|
metadatas=final_metadata,
|
||||||
ids=filtered_ids,
|
ids=filtered_ids,
|
||||||
)
|
)
|
||||||
except chromadb.errors.InvalidDimensionException as e:
|
|
||||||
Logger(verbose=True).log(
|
|
||||||
"error",
|
|
||||||
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
|
|
||||||
"red",
|
|
||||||
)
|
|
||||||
raise ValueError(
|
|
||||||
"Embedding dimension mismatch. Make sure you're using the same embedding model "
|
|
||||||
"across all operations with this collection."
|
|
||||||
"Try resetting the collection using `crewai reset-memories -a`"
|
|
||||||
) from e
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red")
|
if hasattr(chromadb, 'errors') and isinstance(e, chromadb.errors.InvalidDimensionException):
|
||||||
raise
|
Logger(verbose=True).log(
|
||||||
|
"error",
|
||||||
|
"Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`",
|
||||||
|
"red",
|
||||||
|
)
|
||||||
|
logging.error(
|
||||||
|
"Embedding dimension mismatch. Make sure you're using the same embedding model "
|
||||||
|
"across all operations with this collection."
|
||||||
|
"Try resetting the collection using `crewai reset-memories -a`"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
Logger(verbose=True).log("error", f"Failed to upsert documents: {e}", "red")
|
||||||
|
logging.error(f"Failed to upsert documents: {e}")
|
||||||
|
|
||||||
def _create_default_embedding_function(self):
|
def _create_default_embedding_function(self):
|
||||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
OpenAIEmbeddingFunction,
|
return None
|
||||||
)
|
|
||||||
|
|
||||||
return OpenAIEmbeddingFunction(
|
try:
|
||||||
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
|
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
||||||
)
|
OpenAIEmbeddingFunction,
|
||||||
|
)
|
||||||
|
|
||||||
|
return OpenAIEmbeddingFunction(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
logging.warning(f"Failed to create default embedding function: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None:
|
def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None:
|
||||||
"""Set the embedding configuration for the knowledge storage.
|
"""Set the embedding configuration for the knowledge storage.
|
||||||
@@ -194,8 +254,12 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
|||||||
embedder_config (Optional[Dict[str, Any]]): Configuration dictionary for the embedder.
|
embedder_config (Optional[Dict[str, Any]]): Configuration dictionary for the embedder.
|
||||||
If None or empty, defaults to the default embedding function.
|
If None or empty, defaults to the default embedding function.
|
||||||
"""
|
"""
|
||||||
self.embedder = (
|
try:
|
||||||
EmbeddingConfigurator().configure_embedder(embedder)
|
self.embedder = (
|
||||||
if embedder
|
EmbeddingConfigurator().configure_embedder(embedder)
|
||||||
else self._create_default_embedding_function()
|
if embedder
|
||||||
)
|
else self._create_default_embedding_function()
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to configure embedder: {str(e)}")
|
||||||
|
self.embedder = None
|
||||||
|
|||||||
@@ -60,26 +60,32 @@ class RAGStorage(BaseRAGStorage):
|
|||||||
self.embedder_config = configurator.configure_embedder(self.embedder_config)
|
self.embedder_config = configurator.configure_embedder(self.embedder_config)
|
||||||
|
|
||||||
def _initialize_app(self):
|
def _initialize_app(self):
|
||||||
import chromadb
|
|
||||||
from chromadb.config import Settings
|
|
||||||
|
|
||||||
self._set_embedder_config()
|
|
||||||
chroma_client = chromadb.PersistentClient(
|
|
||||||
path=self.path if self.path else self.storage_file_name,
|
|
||||||
settings=Settings(allow_reset=self.allow_reset),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.app = chroma_client
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.collection = self.app.get_collection(
|
import chromadb
|
||||||
name=self.type, embedding_function=self.embedder_config
|
from chromadb.config import Settings
|
||||||
)
|
|
||||||
except Exception:
|
self._set_embedder_config()
|
||||||
self.collection = self.app.create_collection(
|
chroma_client = chromadb.PersistentClient(
|
||||||
name=self.type, embedding_function=self.embedder_config
|
path=self.path if self.path else self.storage_file_name,
|
||||||
|
settings=Settings(allow_reset=self.allow_reset),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.app = chroma_client
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.collection = self.app.get_collection(
|
||||||
|
name=self.type, embedding_function=self.embedder_config
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
self.collection = self.app.create_collection(
|
||||||
|
name=self.type, embedding_function=self.embedder_config
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
import logging
|
||||||
|
logging.warning(f"Failed to initialize chromadb: {str(e)}. Memory functionality will be limited.")
|
||||||
|
self.app = None
|
||||||
|
self.collection = None
|
||||||
|
|
||||||
def _sanitize_role(self, role: str) -> str:
|
def _sanitize_role(self, role: str) -> str:
|
||||||
"""
|
"""
|
||||||
Sanitizes agent roles to ensure valid directory names.
|
Sanitizes agent roles to ensure valid directory names.
|
||||||
@@ -103,6 +109,9 @@ class RAGStorage(BaseRAGStorage):
|
|||||||
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
|
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
|
||||||
if not hasattr(self, "app") or not hasattr(self, "collection"):
|
if not hasattr(self, "app") or not hasattr(self, "collection"):
|
||||||
self._initialize_app()
|
self._initialize_app()
|
||||||
|
if self.app is None or self.collection is None:
|
||||||
|
logging.warning("Cannot save to memory as chromadb is not available.")
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
self._generate_embedding(value, metadata)
|
self._generate_embedding(value, metadata)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -115,9 +124,13 @@ class RAGStorage(BaseRAGStorage):
|
|||||||
filter: Optional[dict] = None,
|
filter: Optional[dict] = None,
|
||||||
score_threshold: float = 0.35,
|
score_threshold: float = 0.35,
|
||||||
) -> List[Any]:
|
) -> List[Any]:
|
||||||
if not hasattr(self, "app"):
|
if not hasattr(self, "app") or not hasattr(self, "collection"):
|
||||||
self._initialize_app()
|
self._initialize_app()
|
||||||
|
|
||||||
|
if self.app is None or self.collection is None:
|
||||||
|
logging.warning("Cannot search memory as chromadb is not available.")
|
||||||
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with suppress_logging():
|
with suppress_logging():
|
||||||
response = self.collection.query(query_texts=query, n_results=limit)
|
response = self.collection.query(query_texts=query, n_results=limit)
|
||||||
@@ -142,6 +155,10 @@ class RAGStorage(BaseRAGStorage):
|
|||||||
if not hasattr(self, "app") or not hasattr(self, "collection"):
|
if not hasattr(self, "app") or not hasattr(self, "collection"):
|
||||||
self._initialize_app()
|
self._initialize_app()
|
||||||
|
|
||||||
|
if self.app is None or self.collection is None:
|
||||||
|
logging.warning("Cannot generate embeddings as chromadb is not available.")
|
||||||
|
return
|
||||||
|
|
||||||
self.collection.add(
|
self.collection.add(
|
||||||
documents=[text],
|
documents=[text],
|
||||||
metadatas=[metadata or {}],
|
metadatas=[metadata or {}],
|
||||||
@@ -160,15 +177,7 @@ class RAGStorage(BaseRAGStorage):
|
|||||||
# Ignore this specific error
|
# Ignore this specific error
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
logging.error(f"An error occurred while resetting the {self.type} memory: {e}")
|
||||||
f"An error occurred while resetting the {self.type} memory: {e}"
|
# Don't raise exception to prevent crashes
|
||||||
)
|
self.app = None
|
||||||
|
self.collection = None
|
||||||
def _create_default_embedding_function(self):
|
|
||||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
|
||||||
OpenAIEmbeddingFunction,
|
|
||||||
)
|
|
||||||
|
|
||||||
return OpenAIEmbeddingFunction(
|
|
||||||
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,8 +1,40 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Any, Dict, Optional, cast
|
import warnings
|
||||||
|
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
from chromadb import Documents, EmbeddingFunction, Embeddings
|
# Initialize with None to indicate module import status
|
||||||
from chromadb.api.types import validate_embedding_function
|
CHROMADB_AVAILABLE = False
|
||||||
|
|
||||||
|
# Define placeholder types for when chromadb is not available
|
||||||
|
class EmbeddingFunction:
|
||||||
|
def __call__(self, texts):
|
||||||
|
raise NotImplementedError("Chromadb is not available")
|
||||||
|
|
||||||
|
Documents = List[str]
|
||||||
|
Embeddings = List[List[float]]
|
||||||
|
|
||||||
|
def validate_embedding_function(func):
|
||||||
|
return func
|
||||||
|
|
||||||
|
# Try to import chromadb-related modules with proper error handling
|
||||||
|
try:
|
||||||
|
from chromadb.api.types import Documents as ChromaDocuments
|
||||||
|
from chromadb.api.types import EmbeddingFunction as ChromaEmbeddingFunction
|
||||||
|
from chromadb.api.types import Embeddings as ChromaEmbeddings
|
||||||
|
from chromadb.utils import (
|
||||||
|
validate_embedding_function as chroma_validate_embedding_function,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Override our placeholder types with the real ones
|
||||||
|
Documents = ChromaDocuments
|
||||||
|
EmbeddingFunction = ChromaEmbeddingFunction
|
||||||
|
Embeddings = ChromaEmbeddings
|
||||||
|
validate_embedding_function = chroma_validate_embedding_function
|
||||||
|
|
||||||
|
CHROMADB_AVAILABLE = True
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
# This captures both ImportError and AttributeError (which can happen with NumPy 2.x)
|
||||||
|
warnings.warn(f"Failed to import chromadb: {str(e)}. Embedding functionality will be limited.")
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingConfigurator:
|
class EmbeddingConfigurator:
|
||||||
@@ -26,6 +58,9 @@ class EmbeddingConfigurator:
|
|||||||
embedder_config: Optional[Dict[str, Any]] = None,
|
embedder_config: Optional[Dict[str, Any]] = None,
|
||||||
) -> EmbeddingFunction:
|
) -> EmbeddingFunction:
|
||||||
"""Configures and returns an embedding function based on the provided config."""
|
"""Configures and returns an embedding function based on the provided config."""
|
||||||
|
if not CHROMADB_AVAILABLE:
|
||||||
|
return self._create_unavailable_embedding_function()
|
||||||
|
|
||||||
if embedder_config is None:
|
if embedder_config is None:
|
||||||
return self._create_default_embedding_function()
|
return self._create_default_embedding_function()
|
||||||
|
|
||||||
@@ -46,141 +81,228 @@ class EmbeddingConfigurator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _create_default_embedding_function():
|
def _create_unavailable_embedding_function():
|
||||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
"""Creates a fallback embedding function when chromadb is not available."""
|
||||||
OpenAIEmbeddingFunction,
|
class UnavailableEmbeddingFunction(EmbeddingFunction):
|
||||||
)
|
def __call__(self, input):
|
||||||
|
raise ImportError(
|
||||||
|
"Chromadb is not available due to NumPy compatibility issues. "
|
||||||
|
"Either downgrade to NumPy<2 or upgrade chromadb and related dependencies."
|
||||||
|
)
|
||||||
|
|
||||||
return OpenAIEmbeddingFunction(
|
return UnavailableEmbeddingFunction()
|
||||||
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
|
|
||||||
)
|
@staticmethod
|
||||||
|
def _create_default_embedding_function():
|
||||||
|
if not CHROMADB_AVAILABLE:
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
||||||
|
OpenAIEmbeddingFunction,
|
||||||
|
)
|
||||||
|
|
||||||
|
return OpenAIEmbeddingFunction(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
import warnings
|
||||||
|
warnings.warn(f"Failed to import OpenAIEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_openai(config, model_name):
|
def _configure_openai(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
OpenAIEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return OpenAIEmbeddingFunction(
|
try:
|
||||||
api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"),
|
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
||||||
model_name=model_name,
|
OpenAIEmbeddingFunction,
|
||||||
api_base=config.get("api_base", None),
|
)
|
||||||
api_type=config.get("api_type", None),
|
|
||||||
api_version=config.get("api_version", None),
|
return OpenAIEmbeddingFunction(
|
||||||
default_headers=config.get("default_headers", None),
|
api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"),
|
||||||
dimensions=config.get("dimensions", None),
|
model_name=model_name,
|
||||||
deployment_id=config.get("deployment_id", None),
|
api_base=config.get("api_base", None),
|
||||||
organization_id=config.get("organization_id", None),
|
api_type=config.get("api_type", None),
|
||||||
)
|
api_version=config.get("api_version", None),
|
||||||
|
default_headers=config.get("default_headers", None),
|
||||||
|
dimensions=config.get("dimensions", None),
|
||||||
|
deployment_id=config.get("deployment_id", None),
|
||||||
|
organization_id=config.get("organization_id", None),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import OpenAIEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_azure(config, model_name):
|
def _configure_azure(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
OpenAIEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return OpenAIEmbeddingFunction(
|
try:
|
||||||
api_key=config.get("api_key"),
|
from chromadb.utils.embedding_functions.openai_embedding_function import (
|
||||||
api_base=config.get("api_base"),
|
OpenAIEmbeddingFunction,
|
||||||
api_type=config.get("api_type", "azure"),
|
)
|
||||||
api_version=config.get("api_version"),
|
|
||||||
model_name=model_name,
|
return OpenAIEmbeddingFunction(
|
||||||
default_headers=config.get("default_headers"),
|
api_key=config.get("api_key"),
|
||||||
dimensions=config.get("dimensions"),
|
api_base=config.get("api_base"),
|
||||||
deployment_id=config.get("deployment_id"),
|
api_type=config.get("api_type", "azure"),
|
||||||
organization_id=config.get("organization_id"),
|
api_version=config.get("api_version"),
|
||||||
)
|
model_name=model_name,
|
||||||
|
default_headers=config.get("default_headers"),
|
||||||
|
dimensions=config.get("dimensions"),
|
||||||
|
deployment_id=config.get("deployment_id"),
|
||||||
|
organization_id=config.get("organization_id"),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import OpenAIEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_ollama(config, model_name):
|
def _configure_ollama(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.ollama_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
OllamaEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return OllamaEmbeddingFunction(
|
try:
|
||||||
url=config.get("url", "http://localhost:11434/api/embeddings"),
|
from chromadb.utils.embedding_functions.ollama_embedding_function import (
|
||||||
model_name=model_name,
|
OllamaEmbeddingFunction,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return OllamaEmbeddingFunction(
|
||||||
|
url=config.get("url", "http://localhost:11434/api/embeddings"),
|
||||||
|
model_name=model_name,
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import OllamaEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_vertexai(config, model_name):
|
def _configure_vertexai(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.google_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
GoogleVertexEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return GoogleVertexEmbeddingFunction(
|
try:
|
||||||
model_name=model_name,
|
from chromadb.utils.embedding_functions.google_embedding_function import (
|
||||||
api_key=config.get("api_key"),
|
GoogleVertexEmbeddingFunction,
|
||||||
project_id=config.get("project_id"),
|
)
|
||||||
region=config.get("region"),
|
|
||||||
)
|
return GoogleVertexEmbeddingFunction(
|
||||||
|
model_name=model_name,
|
||||||
|
api_key=config.get("api_key"),
|
||||||
|
project_id=config.get("project_id"),
|
||||||
|
region=config.get("region"),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import GoogleVertexEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_google(config, model_name):
|
def _configure_google(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.google_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
GoogleGenerativeAiEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return GoogleGenerativeAiEmbeddingFunction(
|
try:
|
||||||
model_name=model_name,
|
from chromadb.utils.embedding_functions.google_embedding_function import (
|
||||||
api_key=config.get("api_key"),
|
GoogleGenerativeAiEmbeddingFunction,
|
||||||
task_type=config.get("task_type"),
|
)
|
||||||
)
|
|
||||||
|
return GoogleGenerativeAiEmbeddingFunction(
|
||||||
|
model_name=model_name,
|
||||||
|
api_key=config.get("api_key"),
|
||||||
|
task_type=config.get("task_type"),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import GoogleGenerativeAiEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_cohere(config, model_name):
|
def _configure_cohere(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.cohere_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
CohereEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return CohereEmbeddingFunction(
|
try:
|
||||||
model_name=model_name,
|
from chromadb.utils.embedding_functions.cohere_embedding_function import (
|
||||||
api_key=config.get("api_key"),
|
CohereEmbeddingFunction,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return CohereEmbeddingFunction(
|
||||||
|
model_name=model_name,
|
||||||
|
api_key=config.get("api_key"),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import CohereEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_voyageai(config, model_name):
|
def _configure_voyageai(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.voyageai_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
VoyageAIEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return VoyageAIEmbeddingFunction(
|
try:
|
||||||
model_name=model_name,
|
from chromadb.utils.embedding_functions.voyageai_embedding_function import (
|
||||||
api_key=config.get("api_key"),
|
VoyageAIEmbeddingFunction,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return VoyageAIEmbeddingFunction(
|
||||||
|
model_name=model_name,
|
||||||
|
api_key=config.get("api_key"),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import VoyageAIEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_bedrock(config, model_name):
|
def _configure_bedrock(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
AmazonBedrockEmbeddingFunction,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
# Allow custom model_name override with backwards compatibility
|
try:
|
||||||
kwargs = {"session": config.get("session")}
|
from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (
|
||||||
if model_name is not None:
|
AmazonBedrockEmbeddingFunction,
|
||||||
kwargs["model_name"] = model_name
|
)
|
||||||
return AmazonBedrockEmbeddingFunction(**kwargs)
|
|
||||||
|
# Allow custom model_name override with backwards compatibility
|
||||||
|
kwargs = {"session": config.get("session")}
|
||||||
|
if model_name is not None:
|
||||||
|
kwargs["model_name"] = model_name
|
||||||
|
return AmazonBedrockEmbeddingFunction(**kwargs)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import AmazonBedrockEmbeddingFunction: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_huggingface(config, model_name):
|
def _configure_huggingface(config, model_name):
|
||||||
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
|
if not CHROMADB_AVAILABLE:
|
||||||
HuggingFaceEmbeddingServer,
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
|
|
||||||
return HuggingFaceEmbeddingServer(
|
try:
|
||||||
url=config.get("api_url"),
|
from chromadb.utils.embedding_functions.huggingface_embedding_function import (
|
||||||
)
|
HuggingFaceEmbeddingServer,
|
||||||
|
)
|
||||||
|
|
||||||
|
return HuggingFaceEmbeddingServer(
|
||||||
|
url=config.get("api_url"),
|
||||||
|
)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Failed to import HuggingFaceEmbeddingServer: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_watson(config, model_name):
|
def _configure_watson(config, model_name):
|
||||||
|
if not CHROMADB_AVAILABLE:
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import ibm_watsonx_ai.foundation_models as watson_models
|
import ibm_watsonx_ai.foundation_models as watson_models
|
||||||
from ibm_watsonx_ai import Credentials
|
from ibm_watsonx_ai import Credentials
|
||||||
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames as EmbedParams
|
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames as EmbedParams
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(
|
warnings.warn(
|
||||||
"IBM Watson dependencies are not installed. Please install them to use Watson embedding."
|
"IBM Watson dependencies are not installed. Please install them to use Watson embedding."
|
||||||
) from e
|
)
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
class WatsonEmbeddingFunction(EmbeddingFunction):
|
class WatsonEmbeddingFunction(EmbeddingFunction):
|
||||||
def __call__(self, input: Documents) -> Embeddings:
|
def __call__(self, input: Documents) -> Embeddings:
|
||||||
@@ -212,25 +334,30 @@ class EmbeddingConfigurator:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _configure_custom(config):
|
def _configure_custom(config):
|
||||||
|
if not CHROMADB_AVAILABLE:
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|
||||||
custom_embedder = config.get("embedder")
|
custom_embedder = config.get("embedder")
|
||||||
if isinstance(custom_embedder, EmbeddingFunction):
|
if isinstance(custom_embedder, EmbeddingFunction):
|
||||||
try:
|
try:
|
||||||
validate_embedding_function(custom_embedder)
|
validate_embedding_function(custom_embedder)
|
||||||
return custom_embedder
|
return custom_embedder
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Invalid custom embedding function: {str(e)}")
|
warnings.warn(f"Invalid custom embedding function: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
elif callable(custom_embedder):
|
elif callable(custom_embedder):
|
||||||
try:
|
try:
|
||||||
instance = custom_embedder()
|
instance = custom_embedder()
|
||||||
if isinstance(instance, EmbeddingFunction):
|
if isinstance(instance, EmbeddingFunction):
|
||||||
validate_embedding_function(instance)
|
validate_embedding_function(instance)
|
||||||
return instance
|
return instance
|
||||||
raise ValueError(
|
warnings.warn("Custom embedder does not create an EmbeddingFunction instance")
|
||||||
"Custom embedder does not create an EmbeddingFunction instance"
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Error instantiating custom embedder: {str(e)}")
|
warnings.warn(f"Error instantiating custom embedder: {str(e)}")
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
warnings.warn(
|
||||||
"Custom embedder must be an instance of `EmbeddingFunction` or a callable that creates one"
|
"Custom embedder must be an instance of `EmbeddingFunction` or a callable that creates one"
|
||||||
)
|
)
|
||||||
|
return EmbeddingConfigurator._create_unavailable_embedding_function()
|
||||||
|
|||||||
64
tests/test_numpy_compatibility.py
Normal file
64
tests/test_numpy_compatibility.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_crew_import_with_numpy():
|
||||||
|
"""Test that crewai can be imported even with NumPy compatibility issues."""
|
||||||
|
try:
|
||||||
|
# Force reload to ensure we test our fix
|
||||||
|
if "crewai" in sys.modules:
|
||||||
|
importlib.reload(sys.modules["crewai"])
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
from crewai import Crew
|
||||||
|
assert Crew is not None
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Failed to import Crew: {e}")
|
||||||
|
|
||||||
|
def test_embedding_configurator_with_numpy():
|
||||||
|
"""Test that EmbeddingConfigurator can be imported with NumPy."""
|
||||||
|
try:
|
||||||
|
# Force reload
|
||||||
|
if "crewai.utilities.embedding_configurator" in sys.modules:
|
||||||
|
importlib.reload(sys.modules["crewai.utilities.embedding_configurator"])
|
||||||
|
|
||||||
|
from crewai.utilities.embedding_configurator import EmbeddingConfigurator
|
||||||
|
configurator = EmbeddingConfigurator()
|
||||||
|
# Test that we can create an embedder (might be unavailable but shouldn't crash)
|
||||||
|
embedder = configurator.configure_embedder()
|
||||||
|
assert embedder is not None
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Failed to use EmbeddingConfigurator: {e}")
|
||||||
|
|
||||||
|
def test_rag_storage_with_numpy():
|
||||||
|
"""Test that RAGStorage can be imported and used with NumPy."""
|
||||||
|
try:
|
||||||
|
# Force reload
|
||||||
|
if "crewai.memory.storage.rag_storage" in sys.modules:
|
||||||
|
importlib.reload(sys.modules["crewai.memory.storage.rag_storage"])
|
||||||
|
|
||||||
|
from crewai.memory.storage.rag_storage import RAGStorage
|
||||||
|
# Initialize with minimal config to avoid actual DB operations
|
||||||
|
storage = RAGStorage(type="test", crew=None)
|
||||||
|
# Just verify we can create the object without errors
|
||||||
|
assert storage is not None
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Failed to use RAGStorage: {e}")
|
||||||
|
|
||||||
|
def test_knowledge_storage_with_numpy():
|
||||||
|
"""Test that KnowledgeStorage can be imported and used with NumPy."""
|
||||||
|
try:
|
||||||
|
# Force reload
|
||||||
|
if "crewai.knowledge.storage.knowledge_storage" in sys.modules:
|
||||||
|
importlib.reload(sys.modules["crewai.knowledge.storage.knowledge_storage"])
|
||||||
|
|
||||||
|
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||||
|
# Initialize with minimal config
|
||||||
|
storage = KnowledgeStorage()
|
||||||
|
# Just verify we can create the object without errors
|
||||||
|
assert storage is not None
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Failed to use KnowledgeStorage: {e}")
|
||||||
Reference in New Issue
Block a user