From ce5ea9be6f9147d2a2e56beeb9cabbdce53cd903 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Thu, 25 Sep 2025 18:28:39 -0400 Subject: [PATCH] feat: add custom embedding types and migrate providers - introduce baseembeddingsprovider and helper for embedding functions - add core embedding types and migrate providers, factory, and storage modules - remove unused type aliases and fix pydantic schema error - update providers with env var support and related fixes --- pyproject.toml | 9 + .../knowledge/storage/knowledge_storage.py | 54 +- src/crewai/memory/storage/rag_storage.py | 21 +- .../rag/core/base_embeddings_callable.py | 142 ++++ .../rag/core/base_embeddings_provider.py | 23 + src/crewai/rag/core/types.py | 28 + src/crewai/rag/embeddings/configurator.py | 245 ------ src/crewai/rag/embeddings/factory.py | 572 ++++++++------ .../rag/embeddings/providers/__init__.py | 1 + .../rag/embeddings/providers/aws/__init__.py | 13 + .../rag/embeddings/providers/aws/bedrock.py | 58 ++ .../rag/embeddings/providers/aws/types.py | 17 + .../embeddings/providers/cohere/__init__.py | 13 + .../providers/cohere/cohere_provider.py | 24 + .../rag/embeddings/providers/cohere/types.py | 17 + .../embeddings/providers/custom/__init__.py | 13 + .../providers/custom/custom_provider.py | 19 + .../providers/custom/embedding_callable.py | 22 + .../rag/embeddings/providers/custom/types.py | 18 + .../embeddings/providers/google/__init__.py | 23 + .../providers/google/generative_ai.py | 30 + .../rag/embeddings/providers/google/types.py | 34 + .../rag/embeddings/providers/google/vertex.py | 35 + .../providers/huggingface/__init__.py | 15 + .../huggingface/huggingface_provider.py | 20 + .../embeddings/providers/huggingface/types.py | 16 + .../rag/embeddings/providers/ibm/__init__.py | 15 + .../providers/ibm/embedding_callable.py | 144 ++++ .../rag/embeddings/providers/ibm/types.py | 42 ++ .../rag/embeddings/providers/ibm/watson.py | 126 ++++ .../providers/instructor/__init__.py | 15 + .../instructor/instructor_provider.py | 32 + .../embeddings/providers/instructor/types.py | 18 + .../rag/embeddings/providers/jina/__init__.py | 13 + .../providers/jina/jina_provider.py | 22 + .../rag/embeddings/providers/jina/types.py | 17 + .../providers/microsoft/__init__.py | 15 + .../embeddings/providers/microsoft/azure.py | 58 ++ .../embeddings/providers/microsoft/types.py | 24 + .../embeddings/providers/ollama/__init__.py | 15 + .../providers/ollama/ollama_provider.py | 25 + .../rag/embeddings/providers/ollama/types.py | 17 + .../rag/embeddings/providers/onnx/__init__.py | 13 + .../providers/onnx/onnx_provider.py | 19 + .../rag/embeddings/providers/onnx/types.py | 16 + .../embeddings/providers/openai/__init__.py | 15 + .../providers/openai/openai_provider.py | 58 ++ .../rag/embeddings/providers/openai/types.py | 24 + .../embeddings/providers/openclip/__init__.py | 15 + .../providers/openclip/openclip_provider.py | 32 + .../embeddings/providers/openclip/types.py | 18 + .../embeddings/providers/roboflow/__init__.py | 15 + .../providers/roboflow/roboflow_provider.py | 25 + .../embeddings/providers/roboflow/types.py | 17 + .../sentence_transformer/__init__.py | 15 + .../sentence_transformer_provider.py | 34 + .../providers/sentence_transformer/types.py | 18 + .../embeddings/providers/text2vec/__init__.py | 15 + .../providers/text2vec/text2vec_provider.py | 22 + .../embeddings/providers/text2vec/types.py | 16 + .../embeddings/providers/voyageai/__init__.py | 15 + .../providers/voyageai/embedding_callable.py | 50 ++ .../embeddings/providers/voyageai/types.py | 23 + .../providers/voyageai/voyageai_provider.py | 55 ++ src/crewai/rag/embeddings/types.py | 106 +-- src/crewai/rag/storage/base_rag_storage.py | 6 +- src/crewai/rag/types.py | 3 +- .../test_knowledge_storage_integration.py | 4 +- .../rag/embeddings/test_embedding_factory.py | 696 +++++------------- tests/rag/embeddings/test_factory_azure.py | 155 ++-- tests/rag/test_error_handling.py | 2 +- tests/utilities/test_azure_embedder_config.py | 82 --- .../utilities/test_embedding_configuration.py | 25 - uv.lock | 386 +++++++++- 74 files changed, 2767 insertions(+), 1308 deletions(-) create mode 100644 src/crewai/rag/core/base_embeddings_callable.py create mode 100644 src/crewai/rag/core/base_embeddings_provider.py create mode 100644 src/crewai/rag/core/types.py delete mode 100644 src/crewai/rag/embeddings/configurator.py create mode 100644 src/crewai/rag/embeddings/providers/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/aws/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/aws/bedrock.py create mode 100644 src/crewai/rag/embeddings/providers/aws/types.py create mode 100644 src/crewai/rag/embeddings/providers/cohere/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/cohere/cohere_provider.py create mode 100644 src/crewai/rag/embeddings/providers/cohere/types.py create mode 100644 src/crewai/rag/embeddings/providers/custom/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/custom/custom_provider.py create mode 100644 src/crewai/rag/embeddings/providers/custom/embedding_callable.py create mode 100644 src/crewai/rag/embeddings/providers/custom/types.py create mode 100644 src/crewai/rag/embeddings/providers/google/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/google/generative_ai.py create mode 100644 src/crewai/rag/embeddings/providers/google/types.py create mode 100644 src/crewai/rag/embeddings/providers/google/vertex.py create mode 100644 src/crewai/rag/embeddings/providers/huggingface/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/huggingface/huggingface_provider.py create mode 100644 src/crewai/rag/embeddings/providers/huggingface/types.py create mode 100644 src/crewai/rag/embeddings/providers/ibm/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/ibm/embedding_callable.py create mode 100644 src/crewai/rag/embeddings/providers/ibm/types.py create mode 100644 src/crewai/rag/embeddings/providers/ibm/watson.py create mode 100644 src/crewai/rag/embeddings/providers/instructor/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/instructor/instructor_provider.py create mode 100644 src/crewai/rag/embeddings/providers/instructor/types.py create mode 100644 src/crewai/rag/embeddings/providers/jina/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/jina/jina_provider.py create mode 100644 src/crewai/rag/embeddings/providers/jina/types.py create mode 100644 src/crewai/rag/embeddings/providers/microsoft/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/microsoft/azure.py create mode 100644 src/crewai/rag/embeddings/providers/microsoft/types.py create mode 100644 src/crewai/rag/embeddings/providers/ollama/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/ollama/ollama_provider.py create mode 100644 src/crewai/rag/embeddings/providers/ollama/types.py create mode 100644 src/crewai/rag/embeddings/providers/onnx/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/onnx/onnx_provider.py create mode 100644 src/crewai/rag/embeddings/providers/onnx/types.py create mode 100644 src/crewai/rag/embeddings/providers/openai/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/openai/openai_provider.py create mode 100644 src/crewai/rag/embeddings/providers/openai/types.py create mode 100644 src/crewai/rag/embeddings/providers/openclip/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/openclip/openclip_provider.py create mode 100644 src/crewai/rag/embeddings/providers/openclip/types.py create mode 100644 src/crewai/rag/embeddings/providers/roboflow/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/roboflow/roboflow_provider.py create mode 100644 src/crewai/rag/embeddings/providers/roboflow/types.py create mode 100644 src/crewai/rag/embeddings/providers/sentence_transformer/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/sentence_transformer/sentence_transformer_provider.py create mode 100644 src/crewai/rag/embeddings/providers/sentence_transformer/types.py create mode 100644 src/crewai/rag/embeddings/providers/text2vec/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/text2vec/text2vec_provider.py create mode 100644 src/crewai/rag/embeddings/providers/text2vec/types.py create mode 100644 src/crewai/rag/embeddings/providers/voyageai/__init__.py create mode 100644 src/crewai/rag/embeddings/providers/voyageai/embedding_callable.py create mode 100644 src/crewai/rag/embeddings/providers/voyageai/types.py create mode 100644 src/crewai/rag/embeddings/providers/voyageai/voyageai_provider.py delete mode 100644 tests/utilities/test_azure_embedder_config.py delete mode 100644 tests/utilities/test_embedding_configuration.py diff --git a/pyproject.toml b/pyproject.toml index fc403fdb6..57eb32625 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,15 @@ aisuite = [ qdrant = [ "qdrant-client[fastembed]>=1.14.3", ] +aws = [ + "boto3>=1.40.38", +] +watson = [ + "ibm-watsonx-ai>=1.3.39", +] +voyageai = [ + "voyageai>=0.3.5", +] [dependency-groups] dev = [ diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index a526ec98b..c06f513b7 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -8,7 +8,9 @@ from crewai.rag.chromadb.config import ChromaDBConfig from crewai.rag.chromadb.types import ChromaEmbeddingFunctionWrapper from crewai.rag.config.utils import get_rag_client from crewai.rag.core.base_client import BaseClient -from crewai.rag.embeddings.factory import EmbedderConfig, get_embedding_function +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.rag.embeddings.factory import build_embedder +from crewai.rag.embeddings.types import ProviderSpec from crewai.rag.factory import create_client from crewai.rag.types import BaseRecord, SearchResult from crewai.utilities.logger import Logger @@ -22,12 +24,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): def __init__( self, - embedder: dict[str, Any] | None = None, + embedder: ProviderSpec | BaseEmbeddingsProvider | None = None, collection_name: str | None = None, ) -> None: self.collection_name = collection_name self._client: BaseClient | None = None - self._embedder_config = embedder # Store embedder config warnings.filterwarnings( "ignore", @@ -36,29 +37,12 @@ class KnowledgeStorage(BaseKnowledgeStorage): ) if embedder: - # Cast to EmbedderConfig for type checking - embedder_typed = cast(EmbedderConfig, embedder) - embedding_function = get_embedding_function(embedder_typed) - batch_size = None - if isinstance(embedder, dict) and "config" in embedder: - nested_config = embedder["config"] - if isinstance(nested_config, dict): - batch_size = nested_config.get("batch_size") - - # Create config with batch_size if provided - if batch_size is not None: - config = ChromaDBConfig( - embedding_function=cast( - ChromaEmbeddingFunctionWrapper, embedding_function - ), - batch_size=batch_size, - ) - else: - config = ChromaDBConfig( - embedding_function=cast( - ChromaEmbeddingFunctionWrapper, embedding_function - ) + embedding_function = build_embedder(embedder) + config = ChromaDBConfig( + embedding_function=cast( + ChromaEmbeddingFunctionWrapper, embedding_function ) + ) self._client = create_client(config) def _get_client(self) -> BaseClient: @@ -123,23 +107,9 @@ class KnowledgeStorage(BaseKnowledgeStorage): rag_documents: list[BaseRecord] = [{"content": doc} for doc in documents] - batch_size = None - if self._embedder_config and isinstance(self._embedder_config, dict): - if "config" in self._embedder_config: - nested_config = self._embedder_config["config"] - if isinstance(nested_config, dict): - batch_size = nested_config.get("batch_size") - - if batch_size is not None: - client.add_documents( - collection_name=collection_name, - documents=rag_documents, - batch_size=batch_size, - ) - else: - client.add_documents( - collection_name=collection_name, documents=rag_documents - ) + client.add_documents( + collection_name=collection_name, documents=rag_documents + ) except Exception as e: if "dimension mismatch" in str(e).lower(): Logger(verbose=True).log( diff --git a/src/crewai/memory/storage/rag_storage.py b/src/crewai/memory/storage/rag_storage.py index a0e08d4dc..f3c49d229 100644 --- a/src/crewai/memory/storage/rag_storage.py +++ b/src/crewai/memory/storage/rag_storage.py @@ -7,8 +7,9 @@ from crewai.rag.chromadb.config import ChromaDBConfig from crewai.rag.chromadb.types import ChromaEmbeddingFunctionWrapper from crewai.rag.config.utils import get_rag_client from crewai.rag.core.base_client import BaseClient -from crewai.rag.embeddings.factory import EmbedderConfig, get_embedding_function -from crewai.rag.embeddings.types import EmbeddingOptions +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.rag.embeddings.factory import build_embedder +from crewai.rag.embeddings.types import ProviderSpec from crewai.rag.factory import create_client from crewai.rag.storage.base_rag_storage import BaseRAGStorage from crewai.rag.types import BaseRecord @@ -26,7 +27,7 @@ class RAGStorage(BaseRAGStorage): self, type: str, allow_reset: bool = True, - embedder_config: EmbeddingOptions | EmbedderConfig | None = None, + embedder_config: ProviderSpec | BaseEmbeddingsProvider | None = None, crew: Any = None, path: str | None = None, ) -> None: @@ -50,15 +51,17 @@ class RAGStorage(BaseRAGStorage): ) if self.embedder_config: - embedding_function = get_embedding_function(self.embedder_config) + embedding_function = build_embedder(self.embedder_config) try: _ = embedding_function(["test"]) except Exception as e: provider = ( - self.embedder_config.provider - if isinstance(self.embedder_config, EmbeddingOptions) - else self.embedder_config.get("provider", "unknown") + self.embedder_config["provider"] + if isinstance(self.embedder_config, dict) + else self.embedder_config.__class__.__name__.replace( + "Provider", "" + ).lower() ) raise ValueError( f"Failed to initialize embedder. Please check your configuration or connection.\n" @@ -80,7 +83,7 @@ class RAGStorage(BaseRAGStorage): embedding_function=cast( ChromaEmbeddingFunctionWrapper, embedding_function ), - batch_size=batch_size, + batch_size=cast(int, batch_size), ) else: config = ChromaDBConfig( @@ -142,7 +145,7 @@ class RAGStorage(BaseRAGStorage): client.add_documents( collection_name=collection_name, documents=[document], - batch_size=batch_size, + batch_size=cast(int, batch_size), ) else: client.add_documents( diff --git a/src/crewai/rag/core/base_embeddings_callable.py b/src/crewai/rag/core/base_embeddings_callable.py new file mode 100644 index 000000000..090a3d026 --- /dev/null +++ b/src/crewai/rag/core/base_embeddings_callable.py @@ -0,0 +1,142 @@ +"""Base embeddings callable utilities for RAG systems.""" + +from typing import Protocol, TypeVar, runtime_checkable + +import numpy as np + +from crewai.rag.core.types import ( + Embeddable, + Embedding, + Embeddings, + PyEmbedding, +) + +T = TypeVar("T") +D = TypeVar("D", bound=Embeddable, contravariant=True) + + +def normalize_embeddings( + target: Embedding | list[Embedding] | PyEmbedding | list[PyEmbedding], +) -> Embeddings | None: + """Normalize various embedding formats to a standard list of numpy arrays. + + Args: + target: Input embeddings in various formats (list of floats, list of lists, + numpy array, or list of numpy arrays). + + Returns: + Normalized embeddings as a list of numpy arrays, or None if input is None. + + Raises: + ValueError: If embeddings are empty or in an unsupported format. + """ + if isinstance(target, np.ndarray): + if target.ndim == 1: + return [target.astype(np.float32)] + if target.ndim == 2: + return [row.astype(np.float32) for row in target] + raise ValueError(f"Unsupported numpy array shape: {target.shape}") + + first = target[0] + if isinstance(first, (int, float)) and not isinstance(first, bool): + return [np.array(target, dtype=np.float32)] + if isinstance(first, list): + return [np.array(emb, dtype=np.float32) for emb in target] + if isinstance(first, np.ndarray): + return [emb.astype(np.float32) for emb in target] # type: ignore[union-attr] + + raise ValueError(f"Unsupported embeddings format: {type(first)}") + + +def maybe_cast_one_to_many(target: T | list[T] | None) -> list[T] | None: + """Cast a single item to a list if needed. + + Args: + target: A single item or list of items. + + Returns: + A list of items or None if input is None. + """ + if target is None: + return None + return target if isinstance(target, list) else [target] + + +def validate_embeddings(embeddings: Embeddings) -> Embeddings: + """Validate embeddings format and content. + + Args: + embeddings: List of numpy arrays to validate. + + Returns: + Validated embeddings. + + Raises: + ValueError: If embeddings format or content is invalid. + """ + if not isinstance(embeddings, list): + raise ValueError( + f"Expected embeddings to be a list, got {type(embeddings).__name__}" + ) + if len(embeddings) == 0: + raise ValueError( + f"Expected embeddings to be a list with at least one item, got {len(embeddings)} embeddings" + ) + if not all(isinstance(e, np.ndarray) for e in embeddings): + raise ValueError( + "Expected each embedding in the embeddings to be a numpy array" + ) + for i, embedding in enumerate(embeddings): + if embedding.ndim == 0: + raise ValueError( + f"Expected a 1-dimensional array, got a 0-dimensional array {embedding}" + ) + if embedding.size == 0: + raise ValueError( + f"Expected each embedding to be a 1-dimensional numpy array with at least 1 value. " + f"Got an array with no values at position {i}" + ) + if not all( + isinstance(value, (np.integer, float, np.floating)) + and not isinstance(value, bool) + for value in embedding + ): + raise ValueError( + f"Expected embedding to contain numeric values, got non-numeric values at position {i}" + ) + return embeddings + + +@runtime_checkable +class EmbeddingFunction(Protocol[D]): + """Protocol for embedding functions. + + Embedding functions convert input data (documents or images) into vector embeddings. + """ + + def __call__(self, input: D) -> Embeddings: + """Convert input data to embeddings. + + Args: + input: Input data to embed (documents or images). + + Returns: + List of numpy arrays representing the embeddings. + """ + ... + + def __init_subclass__(cls) -> None: + """Wrap __call__ method to normalize and validate embeddings.""" + super().__init_subclass__() + original_call = cls.__call__ + + def wrapped_call(self: EmbeddingFunction[D], input: D) -> Embeddings: + result = original_call(self, input) + if result is None: + raise ValueError("Embedding function returned None") + normalized = normalize_embeddings(result) + if normalized is None: + raise ValueError("Normalization returned None for non-None input") + return validate_embeddings(normalized) + + cls.__call__ = wrapped_call # type: ignore[method-assign] diff --git a/src/crewai/rag/core/base_embeddings_provider.py b/src/crewai/rag/core/base_embeddings_provider.py new file mode 100644 index 000000000..d93b575a0 --- /dev/null +++ b/src/crewai/rag/core/base_embeddings_provider.py @@ -0,0 +1,23 @@ +"""Base class for embedding providers.""" + +from typing import Generic, TypeVar + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + +from crewai.rag.core.base_embeddings_callable import EmbeddingFunction + +T = TypeVar("T", bound=EmbeddingFunction) + + +class BaseEmbeddingsProvider(BaseSettings, Generic[T]): + """Abstract base class for embedding providers. + + This class provides a common interface for dynamically loading and building + embedding functions from various providers. + """ + + model_config = SettingsConfigDict(extra="allow", populate_by_name=True) + embedding_callable: type[T] = Field( + ..., description="The embedding function class to use" + ) diff --git a/src/crewai/rag/core/types.py b/src/crewai/rag/core/types.py new file mode 100644 index 000000000..d94ef777c --- /dev/null +++ b/src/crewai/rag/core/types.py @@ -0,0 +1,28 @@ +"""Core type definitions for RAG systems.""" + +from collections.abc import Sequence +from typing import TypeVar + +import numpy as np +from numpy import floating, integer, number +from numpy.typing import NDArray + +T = TypeVar("T") + +PyEmbedding = Sequence[float] | Sequence[int] +PyEmbeddings = list[PyEmbedding] +Embedding = NDArray[np.int32 | np.float32] +Embeddings = list[Embedding] + +Documents = list[str] +Images = list[np.ndarray] +Embeddable = Documents | Images + +ScalarType = TypeVar("ScalarType", bound=np.generic) +IntegerType = TypeVar("IntegerType", bound=integer) +FloatingType = TypeVar("FloatingType", bound=floating) +NumberType = TypeVar("NumberType", bound=number) + +DType32 = TypeVar("DType32", np.int32, np.float32) +DType64 = TypeVar("DType64", np.int64, np.float64) +DTypeCommon = TypeVar("DTypeCommon", np.int32, np.int64, np.float32, np.float64) diff --git a/src/crewai/rag/embeddings/configurator.py b/src/crewai/rag/embeddings/configurator.py deleted file mode 100644 index ce8e8181d..000000000 --- a/src/crewai/rag/embeddings/configurator.py +++ /dev/null @@ -1,245 +0,0 @@ -import os -from typing import Any, cast - -from chromadb import Documents, EmbeddingFunction, Embeddings -from chromadb.api.types import validate_embedding_function - - -class EmbeddingConfigurator: - def __init__(self): - self.embedding_functions = { - "openai": self._configure_openai, - "azure": self._configure_azure, - "ollama": self._configure_ollama, - "vertexai": self._configure_vertexai, - "google": self._configure_google, - "cohere": self._configure_cohere, - "voyageai": self._configure_voyageai, - "bedrock": self._configure_bedrock, - "huggingface": self._configure_huggingface, - "watson": self._configure_watson, - "custom": self._configure_custom, - } - - def configure_embedder( - self, - embedder_config: dict[str, Any] | None = None, - ) -> EmbeddingFunction: - """Configures and returns an embedding function based on the provided config.""" - if embedder_config is None: - return self._create_default_embedding_function() - - provider = embedder_config.get("provider") - config = embedder_config.get("config", {}) - model_name = config.get("model") if provider != "custom" else None - - if provider not in self.embedding_functions: - raise Exception( - f"Unsupported embedding provider: {provider}, supported providers: {list(self.embedding_functions.keys())}" - ) - - try: - embedding_function = self.embedding_functions[provider] - except ImportError as e: - missing_package = str(e).split()[-1] - raise ImportError( - f"{missing_package} is not installed. Please install it with: pip install {missing_package}" - ) from e - - return ( - embedding_function(config) - if provider == "custom" - else embedding_function(config, model_name) - ) - - @staticmethod - def _create_default_embedding_function(): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) - - return OpenAIEmbeddingFunction( - api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" - ) - - @staticmethod - def _configure_openai(config, model_name): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) - - return OpenAIEmbeddingFunction( - api_key=config.get("api_key") or os.getenv("OPENAI_API_KEY"), - model_name=model_name, - api_base=config.get("api_base", None), - api_type=config.get("api_type", None), - api_version=config.get("api_version", None), - default_headers=config.get("default_headers", None), - dimensions=config.get("dimensions", None), - deployment_id=config.get("deployment_id", None), - organization_id=config.get("organization_id", None), - ) - - @staticmethod - def _configure_azure(config, model_name): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, - ) - - return OpenAIEmbeddingFunction( - api_key=config.get("api_key"), - api_base=config.get("api_base"), - api_type=config.get("api_type", "azure"), - api_version=config.get("api_version"), - model_name=model_name, - default_headers=config.get("default_headers"), - dimensions=config.get("dimensions"), - deployment_id=config.get("deployment_id"), - organization_id=config.get("organization_id"), - ) - - @staticmethod - def _configure_ollama(config, model_name): - from chromadb.utils.embedding_functions.ollama_embedding_function import ( - OllamaEmbeddingFunction, - ) - - return OllamaEmbeddingFunction( - url=config.get("url", "http://localhost:11434/api/embeddings"), - model_name=model_name, - ) - - @staticmethod - def _configure_vertexai(config, model_name): - from chromadb.utils.embedding_functions.google_embedding_function import ( - GoogleVertexEmbeddingFunction, - ) - - return GoogleVertexEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - project_id=config.get("project_id"), - region=config.get("region"), - ) - - @staticmethod - def _configure_google(config, model_name): - from chromadb.utils.embedding_functions.google_embedding_function import ( - GoogleGenerativeAiEmbeddingFunction, - ) - - return GoogleGenerativeAiEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - task_type=config.get("task_type"), - ) - - @staticmethod - def _configure_cohere(config, model_name): - from chromadb.utils.embedding_functions.cohere_embedding_function import ( - CohereEmbeddingFunction, - ) - - return CohereEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - ) - - @staticmethod - def _configure_voyageai(config, model_name): - from chromadb.utils.embedding_functions.voyageai_embedding_function import ( # type: ignore[import-not-found] - VoyageAIEmbeddingFunction, - ) - - return VoyageAIEmbeddingFunction( - model_name=model_name, - api_key=config.get("api_key"), - ) - - @staticmethod - def _configure_bedrock(config, model_name): - from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( - AmazonBedrockEmbeddingFunction, - ) - - # Allow custom model_name override with backwards compatibility - kwargs = {"session": config.get("session")} - if model_name is not None: - kwargs["model_name"] = model_name - return AmazonBedrockEmbeddingFunction(**kwargs) - - @staticmethod - def _configure_huggingface(config, model_name): - from chromadb.utils.embedding_functions.huggingface_embedding_function import ( - HuggingFaceEmbeddingServer, - ) - - return HuggingFaceEmbeddingServer( - url=config.get("api_url"), - ) - - @staticmethod - def _configure_watson(config, model_name): - try: - import ibm_watsonx_ai.foundation_models as watson_models # type: ignore[import-not-found] - from ibm_watsonx_ai import Credentials # type: ignore[import-not-found] - from ibm_watsonx_ai.metanames import ( # type: ignore[import-not-found] - EmbedTextParamsMetaNames as EmbedParams, - ) - except ImportError as e: - raise ImportError( - "IBM Watson dependencies are not installed. Please install them to use Watson embedding." - ) from e - - class WatsonEmbeddingFunction(EmbeddingFunction): - def __call__(self, input: Documents) -> Embeddings: - if isinstance(input, str): - input = [input] - - embed_params = { - EmbedParams.TRUNCATE_INPUT_TOKENS: 3, - EmbedParams.RETURN_OPTIONS: {"input_text": True}, - } - - embedding = watson_models.Embeddings( - model_id=config.get("model"), - params=embed_params, - credentials=Credentials( - api_key=config.get("api_key"), url=config.get("api_url") - ), - project_id=config.get("project_id"), - ) - - try: - embeddings = embedding.embed_documents(input) - return cast(Embeddings, embeddings) - except Exception as e: - print("Error during Watson embedding:", e) - raise e - - return WatsonEmbeddingFunction() - - @staticmethod - def _configure_custom(config): - custom_embedder = config.get("embedder") - if isinstance(custom_embedder, EmbeddingFunction): - try: - validate_embedding_function(custom_embedder) - return custom_embedder - except Exception as e: - raise ValueError(f"Invalid custom embedding function: {e!s}") from e - elif callable(custom_embedder): - try: - instance = custom_embedder() - if isinstance(instance, EmbeddingFunction): - validate_embedding_function(instance) - return instance - raise ValueError( - "Custom embedder does not create an EmbeddingFunction instance" - ) - except Exception as e: - raise ValueError(f"Error instantiating custom embedder: {e!s}") from e - else: - raise ValueError( - "Custom embedder must be an instance of `EmbeddingFunction` or a callable that creates one" - ) diff --git a/src/crewai/rag/embeddings/factory.py b/src/crewai/rag/embeddings/factory.py index cc756f314..180ccc8fd 100644 --- a/src/crewai/rag/embeddings/factory.py +++ b/src/crewai/rag/embeddings/factory.py @@ -1,249 +1,363 @@ -"""Minimal embedding function factory for CrewAI.""" +"""Factory functions for creating embedding providers and functions.""" -import os -from collections.abc import Callable, MutableMapping -from typing import Any, Final, Literal, TypedDict +from __future__ import annotations -from chromadb import EmbeddingFunction -from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( - AmazonBedrockEmbeddingFunction, -) -from chromadb.utils.embedding_functions.cohere_embedding_function import ( - CohereEmbeddingFunction, -) -from chromadb.utils.embedding_functions.google_embedding_function import ( - GoogleGenerativeAiEmbeddingFunction, - GooglePalmEmbeddingFunction, - GoogleVertexEmbeddingFunction, -) -from chromadb.utils.embedding_functions.huggingface_embedding_function import ( - HuggingFaceEmbeddingFunction, -) -from chromadb.utils.embedding_functions.instructor_embedding_function import ( - InstructorEmbeddingFunction, -) -from chromadb.utils.embedding_functions.jina_embedding_function import ( - JinaEmbeddingFunction, -) -from chromadb.utils.embedding_functions.ollama_embedding_function import ( - OllamaEmbeddingFunction, -) -from chromadb.utils.embedding_functions.onnx_mini_lm_l6_v2 import ONNXMiniLM_L6_V2 -from chromadb.utils.embedding_functions.open_clip_embedding_function import ( - OpenCLIPEmbeddingFunction, -) -from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, -) -from chromadb.utils.embedding_functions.roboflow_embedding_function import ( - RoboflowEmbeddingFunction, -) -from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( - SentenceTransformerEmbeddingFunction, -) -from chromadb.utils.embedding_functions.text2vec_embedding_function import ( - Text2VecEmbeddingFunction, -) -from typing_extensions import NotRequired +from typing import TYPE_CHECKING, TypeVar, overload -from crewai.rag.embeddings.types import EmbeddingOptions +from crewai.rag.core.base_embeddings_callable import EmbeddingFunction +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.utilities.import_utils import import_and_validate_definition -AllowedEmbeddingProviders = Literal[ - "openai", - "cohere", - "ollama", - "huggingface", - "sentence-transformer", - "instructor", - "google-palm", - "google-generativeai", - "google-vertex", - "amazon-bedrock", - "jina", - "roboflow", - "openclip", - "text2vec", - "onnx", -] +if TYPE_CHECKING: + from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( + AmazonBedrockEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.cohere_embedding_function import ( + CohereEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.google_embedding_function import ( + GoogleGenerativeAiEmbeddingFunction, + GoogleVertexEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.huggingface_embedding_function import ( + HuggingFaceEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.instructor_embedding_function import ( + InstructorEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.jina_embedding_function import ( + JinaEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.ollama_embedding_function import ( + OllamaEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.onnx_mini_lm_l6_v2 import ONNXMiniLM_L6_V2 + from chromadb.utils.embedding_functions.open_clip_embedding_function import ( + OpenCLIPEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.roboflow_embedding_function import ( + RoboflowEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( + SentenceTransformerEmbeddingFunction, + ) + from chromadb.utils.embedding_functions.text2vec_embedding_function import ( + Text2VecEmbeddingFunction, + ) + + from crewai.rag.embeddings.providers.aws.types import BedrockProviderSpec + from crewai.rag.embeddings.providers.cohere.types import CohereProviderSpec + from crewai.rag.embeddings.providers.custom.types import CustomProviderSpec + from crewai.rag.embeddings.providers.google.types import ( + GenerativeAiProviderSpec, + VertexAIProviderSpec, + ) + from crewai.rag.embeddings.providers.huggingface.types import ( + HuggingFaceProviderSpec, + ) + from crewai.rag.embeddings.providers.ibm.embedding_callable import ( + WatsonEmbeddingFunction, + ) + from crewai.rag.embeddings.providers.ibm.types import WatsonProviderSpec + from crewai.rag.embeddings.providers.instructor.types import InstructorProviderSpec + from crewai.rag.embeddings.providers.jina.types import JinaProviderSpec + from crewai.rag.embeddings.providers.microsoft.types import AzureProviderSpec + from crewai.rag.embeddings.providers.ollama.types import OllamaProviderSpec + from crewai.rag.embeddings.providers.onnx.types import ONNXProviderSpec + from crewai.rag.embeddings.providers.openai.types import OpenAIProviderSpec + from crewai.rag.embeddings.providers.openclip.types import OpenCLIPProviderSpec + from crewai.rag.embeddings.providers.roboflow.types import RoboflowProviderSpec + from crewai.rag.embeddings.providers.sentence_transformer.types import ( + SentenceTransformerProviderSpec, + ) + from crewai.rag.embeddings.providers.text2vec.types import Text2VecProviderSpec + from crewai.rag.embeddings.providers.voyageai.embedding_callable import ( + VoyageAIEmbeddingFunction, + ) + from crewai.rag.embeddings.providers.voyageai.types import VoyageAIProviderSpec + +T = TypeVar("T", bound=EmbeddingFunction) -class EmbedderConfig(TypedDict): - """Configuration for embedding functions with nested format.""" - - provider: AllowedEmbeddingProviders - config: NotRequired[dict[str, Any]] - - -EMBEDDING_PROVIDERS: Final[ - dict[AllowedEmbeddingProviders, Callable[..., EmbeddingFunction]] -] = { - "openai": OpenAIEmbeddingFunction, - "cohere": CohereEmbeddingFunction, - "ollama": OllamaEmbeddingFunction, - "huggingface": HuggingFaceEmbeddingFunction, - "sentence-transformer": SentenceTransformerEmbeddingFunction, - "instructor": InstructorEmbeddingFunction, - "google-palm": GooglePalmEmbeddingFunction, - "google-generativeai": GoogleGenerativeAiEmbeddingFunction, - "google-vertex": GoogleVertexEmbeddingFunction, - "amazon-bedrock": AmazonBedrockEmbeddingFunction, - "jina": JinaEmbeddingFunction, - "roboflow": RoboflowEmbeddingFunction, - "openclip": OpenCLIPEmbeddingFunction, - "text2vec": Text2VecEmbeddingFunction, - "onnx": ONNXMiniLM_L6_V2, -} - -PROVIDER_ENV_MAPPING: Final[dict[AllowedEmbeddingProviders, tuple[str, str]]] = { - "openai": ("OPENAI_API_KEY", "api_key"), - "cohere": ("COHERE_API_KEY", "api_key"), - "huggingface": ("HUGGINGFACE_API_KEY", "api_key"), - "google-palm": ("GOOGLE_API_KEY", "api_key"), - "google-generativeai": ("GOOGLE_API_KEY", "api_key"), - "google-vertex": ("GOOGLE_API_KEY", "api_key"), - "jina": ("JINA_API_KEY", "api_key"), - "roboflow": ("ROBOFLOW_API_KEY", "api_key"), +PROVIDER_PATHS = { + "azure": "crewai.rag.embeddings.providers.microsoft.azure.AzureProvider", + "amazon-bedrock": "crewai.rag.embeddings.providers.aws.bedrock.BedrockProvider", + "cohere": "crewai.rag.embeddings.providers.cohere.cohere_provider.CohereProvider", + "custom": "crewai.rag.embeddings.providers.custom.custom_provider.CustomProvider", + "google-generativeai": "crewai.rag.embeddings.providers.google.generative_ai.GenerativeAiProvider", + "google-vertex": "crewai.rag.embeddings.providers.google.vertex.VertexAIProvider", + "huggingface": "crewai.rag.embeddings.providers.huggingface.huggingface_provider.HuggingFaceProvider", + "instructor": "crewai.rag.embeddings.providers.instructor.instructor_provider.InstructorProvider", + "jina": "crewai.rag.embeddings.providers.jina.jina_provider.JinaProvider", + "ollama": "crewai.rag.embeddings.providers.ollama.ollama_provider.OllamaProvider", + "onnx": "crewai.rag.embeddings.providers.onnx.onnx_provider.ONNXProvider", + "openai": "crewai.rag.embeddings.providers.openai.openai_provider.OpenAIProvider", + "openclip": "crewai.rag.embeddings.providers.openclip.openclip_provider.OpenCLIPProvider", + "roboflow": "crewai.rag.embeddings.providers.roboflow.roboflow_provider.RoboflowProvider", + "sentence-transformer": "crewai.rag.embeddings.providers.sentence_transformer.sentence_transformer_provider.SentenceTransformerProvider", + "text2vec": "crewai.rag.embeddings.providers.text2vec.text2vec_provider.Text2VecProvider", + "voyageai": "crewai.rag.embeddings.providers.voyageai.voyageai_provider.VoyageAIProvider", + "watson": "crewai.rag.embeddings.providers.ibm.watson.WatsonProvider", } -def _inject_api_key_from_env( - provider: AllowedEmbeddingProviders, config_dict: MutableMapping[str, Any] -) -> None: - """Inject API key or other required configuration from environment if not explicitly provided. +def build_embedder_from_provider(provider: BaseEmbeddingsProvider[T]) -> T: + """Build an embedding function instance from a provider. Args: - provider: The embedding provider name - config_dict: The configuration dictionary to modify in-place - - Raises: - ImportError: If required libraries for certain providers are not installed - ValueError: If AWS session creation fails for amazon-bedrock - """ - if provider in PROVIDER_ENV_MAPPING: - env_var_name, config_key = PROVIDER_ENV_MAPPING[provider] - if config_key not in config_dict: - env_value = os.getenv(env_var_name) - if env_value: - config_dict[config_key] = env_value - - if provider == "amazon-bedrock": - if "session" not in config_dict: - try: - import boto3 # type: ignore[import] - - config_dict["session"] = boto3.Session() - except ImportError as e: - raise ImportError( - "boto3 is required for amazon-bedrock embeddings. " - "Install it with: uv add boto3" - ) from e - except Exception as e: - raise ValueError( - f"Failed to create AWS session for amazon-bedrock. " - f"Ensure AWS credentials are configured. Error: {e}" - ) from e - - -def get_embedding_function( - config: EmbeddingOptions | EmbedderConfig | None = None, -) -> EmbeddingFunction: - """Get embedding function - delegates to ChromaDB. - - Args: - config: Optional configuration - either: - - EmbeddingOptions: Pydantic model with flat configuration - - EmbedderConfig: TypedDict with nested format {"provider": str, "config": dict} - - None: Uses default OpenAI configuration + provider: The embedding provider configuration. Returns: - EmbeddingFunction instance ready for use with ChromaDB + An instance of the specified embedding function type. + """ + return provider.embedding_callable( + **provider.model_dump(exclude={"embedding_callable"}) + ) - Supported providers: - - openai: OpenAI embeddings - - cohere: Cohere embeddings - - ollama: Ollama local embeddings - - huggingface: HuggingFace embeddings - - sentence-transformer: Local sentence transformers - - instructor: Instructor embeddings for specialized tasks - - google-palm: Google PaLM embeddings - - google-generativeai: Google Generative AI embeddings - - google-vertex: Google Vertex AI embeddings - - amazon-bedrock: AWS Bedrock embeddings - - jina: Jina AI embeddings - - roboflow: Roboflow embeddings for vision tasks - - openclip: OpenCLIP embeddings for multimodal tasks - - text2vec: Text2Vec embeddings - - onnx: ONNX MiniLM-L6-v2 (no API key needed, included with ChromaDB) + +@overload +def build_embedder_from_dict(spec: AzureProviderSpec) -> OpenAIEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: BedrockProviderSpec, +) -> AmazonBedrockEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: CohereProviderSpec) -> CohereEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: CustomProviderSpec) -> EmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: GenerativeAiProviderSpec, +) -> GoogleGenerativeAiEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: HuggingFaceProviderSpec, +) -> HuggingFaceEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: OllamaProviderSpec) -> OllamaEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: OpenAIProviderSpec) -> OpenAIEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: VertexAIProviderSpec, +) -> GoogleVertexEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: VoyageAIProviderSpec, +) -> VoyageAIEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: WatsonProviderSpec) -> WatsonEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: SentenceTransformerProviderSpec, +) -> SentenceTransformerEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: InstructorProviderSpec, +) -> InstructorEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: JinaProviderSpec) -> JinaEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: RoboflowProviderSpec, +) -> RoboflowEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: OpenCLIPProviderSpec, +) -> OpenCLIPEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict( + spec: Text2VecProviderSpec, +) -> Text2VecEmbeddingFunction: ... + + +@overload +def build_embedder_from_dict(spec: ONNXProviderSpec) -> ONNXMiniLM_L6_V2: ... + + +def build_embedder_from_dict(spec): + """Build an embedding function instance from a dictionary specification. + + Args: + spec: A dictionary with 'provider' and 'config' keys. + Example: { + "provider": "openai", + "config": { + "api_key": "sk-...", + "model_name": "text-embedding-3-small" + } + } + + Returns: + An instance of the appropriate embedding function. + + Raises: + ValueError: If the provider is not recognized. + """ + provider_name = spec["provider"] + if not provider_name: + raise ValueError("Missing 'provider' key in specification") + + if provider_name not in PROVIDER_PATHS: + raise ValueError( + f"Unknown provider: {provider_name}. Available providers: {list(PROVIDER_PATHS.keys())}" + ) + + provider_path = PROVIDER_PATHS[provider_name] + try: + provider_class = import_and_validate_definition(provider_path) + except (ImportError, AttributeError, ValueError) as e: + raise ImportError(f"Failed to import provider {provider_name}: {e}") from e + + provider_config = spec.get("config", {}) + + if provider_name == "custom" and "embedding_callable" not in provider_config: + raise ValueError("Custom provider requires 'embedding_callable' in config") + + provider = provider_class(**provider_config) + return build_embedder_from_provider(provider) + + +@overload +def build_embedder(spec: BaseEmbeddingsProvider[T]) -> T: ... + + +@overload +def build_embedder(spec: AzureProviderSpec) -> OpenAIEmbeddingFunction: ... + + +@overload +def build_embedder(spec: BedrockProviderSpec) -> AmazonBedrockEmbeddingFunction: ... + + +@overload +def build_embedder(spec: CohereProviderSpec) -> CohereEmbeddingFunction: ... + + +@overload +def build_embedder(spec: CustomProviderSpec) -> EmbeddingFunction: ... + + +@overload +def build_embedder( + spec: GenerativeAiProviderSpec, +) -> GoogleGenerativeAiEmbeddingFunction: ... + + +@overload +def build_embedder(spec: HuggingFaceProviderSpec) -> HuggingFaceEmbeddingFunction: ... + + +@overload +def build_embedder(spec: OllamaProviderSpec) -> OllamaEmbeddingFunction: ... + + +@overload +def build_embedder(spec: OpenAIProviderSpec) -> OpenAIEmbeddingFunction: ... + + +@overload +def build_embedder(spec: VertexAIProviderSpec) -> GoogleVertexEmbeddingFunction: ... + + +@overload +def build_embedder(spec: VoyageAIProviderSpec) -> VoyageAIEmbeddingFunction: ... + + +@overload +def build_embedder(spec: WatsonProviderSpec) -> WatsonEmbeddingFunction: ... + + +@overload +def build_embedder( + spec: SentenceTransformerProviderSpec, +) -> SentenceTransformerEmbeddingFunction: ... + + +@overload +def build_embedder(spec: InstructorProviderSpec) -> InstructorEmbeddingFunction: ... + + +@overload +def build_embedder(spec: JinaProviderSpec) -> JinaEmbeddingFunction: ... + + +@overload +def build_embedder(spec: RoboflowProviderSpec) -> RoboflowEmbeddingFunction: ... + + +@overload +def build_embedder(spec: OpenCLIPProviderSpec) -> OpenCLIPEmbeddingFunction: ... + + +@overload +def build_embedder(spec: Text2VecProviderSpec) -> Text2VecEmbeddingFunction: ... + + +@overload +def build_embedder(spec: ONNXProviderSpec) -> ONNXMiniLM_L6_V2: ... + + +def build_embedder(spec): + """Build an embedding function from either a provider spec or a provider instance. + + Args: + spec: Either a provider specification dictionary or a provider instance. + + Returns: + An embedding function instance. If a typed provider is passed, returns + the specific embedding function type. Examples: - # Use default OpenAI embedding - >>> embedder = get_embedding_function() + # From dictionary specification + embedder = build_embedder({ + "provider": "openai", + "config": {"api_key": "sk-..."} + }) - # Use Cohere with dict - >>> embedder = get_embedding_function(EmbedderConfig(**{ - ... "provider": "cohere", - ... "config": { - ... "api_key": "your-key", - ... "model_name": "embed-english-v3.0" - ... } - ... })) - - # Use with EmbeddingOptions - >>> embedder = get_embedding_function( - ... EmbeddingOptions(provider="sentence-transformer", model_name="all-MiniLM-L6-v2") - ... ) - - # Use Azure OpenAI - >>> embedder = get_embedding_function(EmbedderConfig(**{ - ... "provider": "openai", - ... "config": { - ... "api_key": "your-azure-key", - ... "api_base": "https://your-resource.openai.azure.com/", - ... "api_type": "azure", - ... "api_version": "2023-05-15", - ... "model": "text-embedding-3-small", - ... "deployment_id": "your-deployment-name" - ... } - ... }) - - >>> embedder = get_embedding_function(EmbedderConfig(**{ - ... "provider": "onnx" - ... }) + # From provider instance + provider = OpenAIProvider(api_key="sk-...") + embedder = build_embedder(provider) """ - if config is None: - return OpenAIEmbeddingFunction( - api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" - ) + if isinstance(spec, BaseEmbeddingsProvider): + return build_embedder_from_provider(spec) + return build_embedder_from_dict(spec) - provider: AllowedEmbeddingProviders - config_dict: dict[str, Any] - if isinstance(config, EmbeddingOptions): - config_dict = config.model_dump(exclude_none=True) - provider = config_dict["provider"] - else: - provider = config["provider"] - nested: dict[str, Any] = config.get("config", {}) - - if not nested and len(config) > 1: - raise ValueError( - "Invalid embedder configuration format. " - "Configuration must be nested under a 'config' key. " - "Example: {'provider': 'openai', 'config': {'api_key': '...', 'model': '...'}}" - ) - - config_dict = dict(nested) - if "model" in config_dict and "model_name" not in config_dict: - config_dict["model_name"] = config_dict.pop("model") - - if provider not in EMBEDDING_PROVIDERS: - raise ValueError( - f"Unsupported provider: {provider}. " - f"Available providers: {list(EMBEDDING_PROVIDERS.keys())}" - ) - - _inject_api_key_from_env(provider, config_dict) - - config_dict.pop("batch_size", None) - - return EMBEDDING_PROVIDERS[provider](**config_dict) +# Backward compatibility alias +get_embedding_function = build_embedder diff --git a/src/crewai/rag/embeddings/providers/__init__.py b/src/crewai/rag/embeddings/providers/__init__.py new file mode 100644 index 000000000..74d87c83f --- /dev/null +++ b/src/crewai/rag/embeddings/providers/__init__.py @@ -0,0 +1 @@ +"""Embedding provider implementations.""" diff --git a/src/crewai/rag/embeddings/providers/aws/__init__.py b/src/crewai/rag/embeddings/providers/aws/__init__.py new file mode 100644 index 000000000..861bfd254 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/aws/__init__.py @@ -0,0 +1,13 @@ +"""AWS embedding providers.""" + +from crewai.rag.embeddings.providers.aws.bedrock import BedrockProvider +from crewai.rag.embeddings.providers.aws.types import ( + BedrockProviderConfig, + BedrockProviderSpec, +) + +__all__ = [ + "BedrockProvider", + "BedrockProviderConfig", + "BedrockProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/aws/bedrock.py b/src/crewai/rag/embeddings/providers/aws/bedrock.py new file mode 100644 index 000000000..9319b8c82 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/aws/bedrock.py @@ -0,0 +1,58 @@ +"""Amazon Bedrock embeddings provider.""" + +from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import ( + AmazonBedrockEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + +try: + from boto3.session import Session # type: ignore[import-untyped] +except ImportError as exc: + raise ImportError( + "boto3 is required for amazon-bedrock embeddings. Install it with: uv add boto3" + ) from exc + + +def create_aws_session() -> Session: + """Create an AWS session for Bedrock. + + Returns: + boto3.Session: AWS session object + + Raises: + ImportError: If boto3 is not installed + ValueError: If AWS session creation fails + """ + try: + import boto3 # type: ignore[import] + + return boto3.Session() + except ImportError as e: + raise ImportError( + "boto3 is required for amazon-bedrock embeddings. " + "Install it with: uv add boto3" + ) from e + except Exception as e: + raise ValueError( + f"Failed to create AWS session for amazon-bedrock. " + f"Ensure AWS credentials are configured. Error: {e}" + ) from e + + +class BedrockProvider(BaseEmbeddingsProvider[AmazonBedrockEmbeddingFunction]): + """Amazon Bedrock embeddings provider.""" + + embedding_callable: type[AmazonBedrockEmbeddingFunction] = Field( + default=AmazonBedrockEmbeddingFunction, + description="Amazon Bedrock embedding function class", + ) + model_name: str = Field( + default="amazon.titan-embed-text-v1", + description="Model name to use for embeddings", + validation_alias="BEDROCK_MODEL_NAME", + ) + session: Session = Field( + default_factory=create_aws_session, description="AWS session object" + ) diff --git a/src/crewai/rag/embeddings/providers/aws/types.py b/src/crewai/rag/embeddings/providers/aws/types.py new file mode 100644 index 000000000..c2f80f095 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/aws/types.py @@ -0,0 +1,17 @@ +"""Type definitions for AWS embedding providers.""" + +from typing import Annotated, Any, Literal, TypedDict + + +class BedrockProviderConfig(TypedDict, total=False): + """Configuration for Bedrock provider.""" + + model_name: Annotated[str, "amazon.titan-embed-text-v1"] + session: Any + + +class BedrockProviderSpec(TypedDict): + """Bedrock provider specification.""" + + provider: Literal["amazon-bedrock"] + config: BedrockProviderConfig diff --git a/src/crewai/rag/embeddings/providers/cohere/__init__.py b/src/crewai/rag/embeddings/providers/cohere/__init__.py new file mode 100644 index 000000000..16c517147 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/cohere/__init__.py @@ -0,0 +1,13 @@ +"""Cohere embedding providers.""" + +from crewai.rag.embeddings.providers.cohere.cohere_provider import CohereProvider +from crewai.rag.embeddings.providers.cohere.types import ( + CohereProviderConfig, + CohereProviderSpec, +) + +__all__ = [ + "CohereProvider", + "CohereProviderConfig", + "CohereProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/cohere/cohere_provider.py b/src/crewai/rag/embeddings/providers/cohere/cohere_provider.py new file mode 100644 index 000000000..d0d64a72f --- /dev/null +++ b/src/crewai/rag/embeddings/providers/cohere/cohere_provider.py @@ -0,0 +1,24 @@ +"""Cohere embeddings provider.""" + +from chromadb.utils.embedding_functions.cohere_embedding_function import ( + CohereEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class CohereProvider(BaseEmbeddingsProvider[CohereEmbeddingFunction]): + """Cohere embeddings provider.""" + + embedding_callable: type[CohereEmbeddingFunction] = Field( + default=CohereEmbeddingFunction, description="Cohere embedding function class" + ) + api_key: str = Field( + description="Cohere API key", validation_alias="COHERE_API_KEY" + ) + model_name: str = Field( + default="large", + description="Model name to use for embeddings", + validation_alias="COHERE_MODEL_NAME", + ) diff --git a/src/crewai/rag/embeddings/providers/cohere/types.py b/src/crewai/rag/embeddings/providers/cohere/types.py new file mode 100644 index 000000000..77947e787 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/cohere/types.py @@ -0,0 +1,17 @@ +"""Type definitions for Cohere embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class CohereProviderConfig(TypedDict, total=False): + """Configuration for Cohere provider.""" + + api_key: str + model_name: Annotated[str, "large"] + + +class CohereProviderSpec(TypedDict): + """Cohere provider specification.""" + + provider: Literal["cohere"] + config: CohereProviderConfig diff --git a/src/crewai/rag/embeddings/providers/custom/__init__.py b/src/crewai/rag/embeddings/providers/custom/__init__.py new file mode 100644 index 000000000..f6f08747d --- /dev/null +++ b/src/crewai/rag/embeddings/providers/custom/__init__.py @@ -0,0 +1,13 @@ +"""Custom embedding providers.""" + +from crewai.rag.embeddings.providers.custom.custom_provider import CustomProvider +from crewai.rag.embeddings.providers.custom.types import ( + CustomProviderConfig, + CustomProviderSpec, +) + +__all__ = [ + "CustomProvider", + "CustomProviderConfig", + "CustomProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/custom/custom_provider.py b/src/crewai/rag/embeddings/providers/custom/custom_provider.py new file mode 100644 index 000000000..8f71446bf --- /dev/null +++ b/src/crewai/rag/embeddings/providers/custom/custom_provider.py @@ -0,0 +1,19 @@ +"""Custom embeddings provider for user-defined embedding functions.""" + +from pydantic import Field +from pydantic_settings import SettingsConfigDict + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.rag.embeddings.providers.custom.embedding_callable import ( + CustomEmbeddingFunction, +) + + +class CustomProvider(BaseEmbeddingsProvider[CustomEmbeddingFunction]): + """Custom embeddings provider for user-defined embedding functions.""" + + embedding_callable: type[CustomEmbeddingFunction] = Field( + ..., description="Custom embedding function class" + ) + + model_config = SettingsConfigDict(extra="allow") diff --git a/src/crewai/rag/embeddings/providers/custom/embedding_callable.py b/src/crewai/rag/embeddings/providers/custom/embedding_callable.py new file mode 100644 index 000000000..e74129727 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/custom/embedding_callable.py @@ -0,0 +1,22 @@ +"""Custom embedding function base implementation.""" + +from crewai.rag.core.base_embeddings_callable import EmbeddingFunction +from crewai.rag.core.types import Documents, Embeddings + + +class CustomEmbeddingFunction(EmbeddingFunction[Documents]): + """Base class for custom embedding functions. + + This provides a concrete implementation that can be subclassed for custom embeddings. + """ + + def __call__(self, input: Documents) -> Embeddings: + """Convert input documents to embeddings. + + Args: + input: List of documents to embed. + + Returns: + List of numpy arrays representing the embeddings. + """ + raise NotImplementedError("Subclasses must implement __call__ method") diff --git a/src/crewai/rag/embeddings/providers/custom/types.py b/src/crewai/rag/embeddings/providers/custom/types.py new file mode 100644 index 000000000..b7e05dc92 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/custom/types.py @@ -0,0 +1,18 @@ +"""Type definitions for custom embedding providers.""" + +from typing import Literal, TypedDict + +from chromadb.api.types import EmbeddingFunction + + +class CustomProviderConfig(TypedDict, total=False): + """Configuration for Custom provider.""" + + embedding_callable: type[EmbeddingFunction] + + +class CustomProviderSpec(TypedDict): + """Custom provider specification.""" + + provider: Literal["custom"] + config: CustomProviderConfig diff --git a/src/crewai/rag/embeddings/providers/google/__init__.py b/src/crewai/rag/embeddings/providers/google/__init__.py new file mode 100644 index 000000000..1aae2bf7e --- /dev/null +++ b/src/crewai/rag/embeddings/providers/google/__init__.py @@ -0,0 +1,23 @@ +"""Google embedding providers.""" + +from crewai.rag.embeddings.providers.google.generative_ai import ( + GenerativeAiProvider, +) +from crewai.rag.embeddings.providers.google.types import ( + GenerativeAiProviderConfig, + GenerativeAiProviderSpec, + VertexAIProviderConfig, + VertexAIProviderSpec, +) +from crewai.rag.embeddings.providers.google.vertex import ( + VertexAIProvider, +) + +__all__ = [ + "GenerativeAiProvider", + "GenerativeAiProviderConfig", + "GenerativeAiProviderSpec", + "VertexAIProvider", + "VertexAIProviderConfig", + "VertexAIProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/google/generative_ai.py b/src/crewai/rag/embeddings/providers/google/generative_ai.py new file mode 100644 index 000000000..503d32118 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/google/generative_ai.py @@ -0,0 +1,30 @@ +"""Google Generative AI embeddings provider.""" + +from chromadb.utils.embedding_functions.google_embedding_function import ( + GoogleGenerativeAiEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class GenerativeAiProvider(BaseEmbeddingsProvider[GoogleGenerativeAiEmbeddingFunction]): + """Google Generative AI embeddings provider.""" + + embedding_callable: type[GoogleGenerativeAiEmbeddingFunction] = Field( + default=GoogleGenerativeAiEmbeddingFunction, + description="Google Generative AI embedding function class", + ) + model_name: str = Field( + default="models/embedding-001", + description="Model name to use for embeddings", + validation_alias="GOOGLE_GENERATIVE_AI_MODEL_NAME", + ) + api_key: str = Field( + description="Google API key", validation_alias="GOOGLE_API_KEY" + ) + task_type: str = Field( + default="RETRIEVAL_DOCUMENT", + description="Task type for embeddings", + validation_alias="GOOGLE_GENERATIVE_AI_TASK_TYPE", + ) diff --git a/src/crewai/rag/embeddings/providers/google/types.py b/src/crewai/rag/embeddings/providers/google/types.py new file mode 100644 index 000000000..8f82483ef --- /dev/null +++ b/src/crewai/rag/embeddings/providers/google/types.py @@ -0,0 +1,34 @@ +"""Type definitions for Google embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class GenerativeAiProviderConfig(TypedDict, total=False): + """Configuration for Google Generative AI provider.""" + + api_key: str + model_name: Annotated[str, "models/embedding-001"] + task_type: Annotated[str, "RETRIEVAL_DOCUMENT"] + + +class GenerativeAiProviderSpec(TypedDict): + """Google Generative AI provider specification.""" + + provider: Literal["google-generativeai"] + config: GenerativeAiProviderConfig + + +class VertexAIProviderConfig(TypedDict, total=False): + """Configuration for Vertex AI provider.""" + + api_key: str + model_name: Annotated[str, "textembedding-gecko"] + project_id: Annotated[str, "cloud-large-language-models"] + region: Annotated[str, "us-central1"] + + +class VertexAIProviderSpec(TypedDict): + """Vertex AI provider specification.""" + + provider: Literal["google-vertex"] + config: VertexAIProviderConfig diff --git a/src/crewai/rag/embeddings/providers/google/vertex.py b/src/crewai/rag/embeddings/providers/google/vertex.py new file mode 100644 index 000000000..9260fe5dc --- /dev/null +++ b/src/crewai/rag/embeddings/providers/google/vertex.py @@ -0,0 +1,35 @@ +"""Google Vertex AI embeddings provider.""" + +from chromadb.utils.embedding_functions.google_embedding_function import ( + GoogleVertexEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class VertexAIProvider(BaseEmbeddingsProvider[GoogleVertexEmbeddingFunction]): + """Google Vertex AI embeddings provider.""" + + embedding_callable: type[GoogleVertexEmbeddingFunction] = Field( + default=GoogleVertexEmbeddingFunction, + description="Vertex AI embedding function class", + ) + model_name: str = Field( + default="textembedding-gecko", + description="Model name to use for embeddings", + validation_alias="GOOGLE_VERTEX_MODEL_NAME", + ) + api_key: str = Field( + description="Google API key", validation_alias="GOOGLE_CLOUD_API_KEY" + ) + project_id: str = Field( + default="cloud-large-language-models", + description="GCP project ID", + validation_alias="GOOGLE_CLOUD_PROJECT", + ) + region: str = Field( + default="us-central1", + description="GCP region", + validation_alias="GOOGLE_CLOUD_REGION", + ) diff --git a/src/crewai/rag/embeddings/providers/huggingface/__init__.py b/src/crewai/rag/embeddings/providers/huggingface/__init__.py new file mode 100644 index 000000000..e52295602 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/huggingface/__init__.py @@ -0,0 +1,15 @@ +"""HuggingFace embedding providers.""" + +from crewai.rag.embeddings.providers.huggingface.huggingface_provider import ( + HuggingFaceProvider, +) +from crewai.rag.embeddings.providers.huggingface.types import ( + HuggingFaceProviderConfig, + HuggingFaceProviderSpec, +) + +__all__ = [ + "HuggingFaceProvider", + "HuggingFaceProviderConfig", + "HuggingFaceProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/huggingface/huggingface_provider.py b/src/crewai/rag/embeddings/providers/huggingface/huggingface_provider.py new file mode 100644 index 000000000..c2aa97582 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/huggingface/huggingface_provider.py @@ -0,0 +1,20 @@ +"""HuggingFace embeddings provider.""" + +from chromadb.utils.embedding_functions.huggingface_embedding_function import ( + HuggingFaceEmbeddingServer, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class HuggingFaceProvider(BaseEmbeddingsProvider[HuggingFaceEmbeddingServer]): + """HuggingFace embeddings provider.""" + + embedding_callable: type[HuggingFaceEmbeddingServer] = Field( + default=HuggingFaceEmbeddingServer, + description="HuggingFace embedding function class", + ) + url: str = Field( + description="HuggingFace API URL", validation_alias="HUGGINGFACE_URL" + ) diff --git a/src/crewai/rag/embeddings/providers/huggingface/types.py b/src/crewai/rag/embeddings/providers/huggingface/types.py new file mode 100644 index 000000000..9735e8d8b --- /dev/null +++ b/src/crewai/rag/embeddings/providers/huggingface/types.py @@ -0,0 +1,16 @@ +"""Type definitions for HuggingFace embedding providers.""" + +from typing import Literal, TypedDict + + +class HuggingFaceProviderConfig(TypedDict, total=False): + """Configuration for HuggingFace provider.""" + + url: str + + +class HuggingFaceProviderSpec(TypedDict): + """HuggingFace provider specification.""" + + provider: Literal["huggingface"] + config: HuggingFaceProviderConfig diff --git a/src/crewai/rag/embeddings/providers/ibm/__init__.py b/src/crewai/rag/embeddings/providers/ibm/__init__.py new file mode 100644 index 000000000..987bf9b01 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ibm/__init__.py @@ -0,0 +1,15 @@ +"""IBM embedding providers.""" + +from crewai.rag.embeddings.providers.ibm.types import ( + WatsonProviderConfig, + WatsonProviderSpec, +) +from crewai.rag.embeddings.providers.ibm.watson import ( + WatsonProvider, +) + +__all__ = [ + "WatsonProvider", + "WatsonProviderConfig", + "WatsonProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/ibm/embedding_callable.py b/src/crewai/rag/embeddings/providers/ibm/embedding_callable.py new file mode 100644 index 000000000..ef983f62d --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ibm/embedding_callable.py @@ -0,0 +1,144 @@ +"""IBM Watson embedding function implementation.""" + +from typing import cast + +import ibm_watsonx_ai.foundation_models as watson_models # type: ignore[import-not-found, import-untyped] +from ibm_watsonx_ai import Credentials # type: ignore[import-not-found, import-untyped] +from ibm_watsonx_ai.metanames import ( # type: ignore[import-not-found, import-untyped] + EmbedTextParamsMetaNames as EmbedParams, +) +from typing_extensions import Unpack + +from crewai.rag.core.base_embeddings_callable import EmbeddingFunction +from crewai.rag.core.types import Documents, Embeddings +from crewai.rag.embeddings.providers.ibm.types import WatsonProviderConfig + + +class WatsonEmbeddingFunction(EmbeddingFunction[Documents]): + """Embedding function for IBM Watson models.""" + + def __init__(self, **kwargs: Unpack[WatsonProviderConfig]) -> None: + """Initialize Watson embedding function. + + Args: + **kwargs: Configuration parameters for Watson Embeddings and Credentials. + """ + self._config = kwargs + + def __call__(self, input: Documents) -> Embeddings: + """Generate embeddings for input documents. + + Args: + input: List of documents to embed. + + Returns: + List of embedding vectors. + """ + if isinstance(input, str): + input = [input] + + embeddings_config: dict = { + "model_id": self._config["model_id"], + } + if "params" in self._config and self._config["params"] is not None: + embeddings_config["params"] = self._config["params"] + if "project_id" in self._config and self._config["project_id"] is not None: + embeddings_config["project_id"] = self._config["project_id"] + if "space_id" in self._config and self._config["space_id"] is not None: + embeddings_config["space_id"] = self._config["space_id"] + if "api_client" in self._config and self._config["api_client"] is not None: + embeddings_config["api_client"] = self._config["api_client"] + if "verify" in self._config and self._config["verify"] is not None: + embeddings_config["verify"] = self._config["verify"] + if "persistent_connection" in self._config: + embeddings_config["persistent_connection"] = self._config[ + "persistent_connection" + ] + if "batch_size" in self._config: + embeddings_config["batch_size"] = self._config["batch_size"] + if "concurrency_limit" in self._config: + embeddings_config["concurrency_limit"] = self._config["concurrency_limit"] + if "max_retries" in self._config and self._config["max_retries"] is not None: + embeddings_config["max_retries"] = self._config["max_retries"] + if "delay_time" in self._config and self._config["delay_time"] is not None: + embeddings_config["delay_time"] = self._config["delay_time"] + if ( + "retry_status_codes" in self._config + and self._config["retry_status_codes"] is not None + ): + embeddings_config["retry_status_codes"] = self._config["retry_status_codes"] + + if "credentials" in self._config and self._config["credentials"] is not None: + embeddings_config["credentials"] = self._config["credentials"] + else: + cred_config: dict = {} + if "url" in self._config and self._config["url"] is not None: + cred_config["url"] = self._config["url"] + if "api_key" in self._config and self._config["api_key"] is not None: + cred_config["api_key"] = self._config["api_key"] + if "name" in self._config and self._config["name"] is not None: + cred_config["name"] = self._config["name"] + if ( + "iam_serviceid_crn" in self._config + and self._config["iam_serviceid_crn"] is not None + ): + cred_config["iam_serviceid_crn"] = self._config["iam_serviceid_crn"] + if ( + "trusted_profile_id" in self._config + and self._config["trusted_profile_id"] is not None + ): + cred_config["trusted_profile_id"] = self._config["trusted_profile_id"] + if "token" in self._config and self._config["token"] is not None: + cred_config["token"] = self._config["token"] + if ( + "projects_token" in self._config + and self._config["projects_token"] is not None + ): + cred_config["projects_token"] = self._config["projects_token"] + if "username" in self._config and self._config["username"] is not None: + cred_config["username"] = self._config["username"] + if "password" in self._config and self._config["password"] is not None: + cred_config["password"] = self._config["password"] + if ( + "instance_id" in self._config + and self._config["instance_id"] is not None + ): + cred_config["instance_id"] = self._config["instance_id"] + if "version" in self._config and self._config["version"] is not None: + cred_config["version"] = self._config["version"] + if ( + "bedrock_url" in self._config + and self._config["bedrock_url"] is not None + ): + cred_config["bedrock_url"] = self._config["bedrock_url"] + if ( + "platform_url" in self._config + and self._config["platform_url"] is not None + ): + cred_config["platform_url"] = self._config["platform_url"] + if "proxies" in self._config and self._config["proxies"] is not None: + cred_config["proxies"] = self._config["proxies"] + if ( + "verify" not in embeddings_config + and "verify" in self._config + and self._config["verify"] is not None + ): + cred_config["verify"] = self._config["verify"] + + if cred_config: + embeddings_config["credentials"] = Credentials(**cred_config) + + if "params" not in embeddings_config: + embeddings_config["params"] = { + EmbedParams.TRUNCATE_INPUT_TOKENS: 3, + EmbedParams.RETURN_OPTIONS: {"input_text": True}, + } + + embedding = watson_models.Embeddings(**embeddings_config) + + try: + embeddings = embedding.embed_documents(input) + return cast(Embeddings, embeddings) + except Exception as e: + print(f"Error during Watson embedding: {e}") + raise diff --git a/src/crewai/rag/embeddings/providers/ibm/types.py b/src/crewai/rag/embeddings/providers/ibm/types.py new file mode 100644 index 000000000..048ce5267 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ibm/types.py @@ -0,0 +1,42 @@ +"""Type definitions for IBM Watson embedding providers.""" + +from typing import Annotated, Any, Literal, TypedDict + + +class WatsonProviderConfig(TypedDict, total=False): + """Configuration for Watson provider.""" + + model_id: str + url: str + params: dict[str, str | dict[str, str]] + credentials: Any + project_id: str + space_id: str + api_client: Any + verify: bool | str + persistent_connection: Annotated[bool, True] + batch_size: Annotated[int, 100] + concurrency_limit: Annotated[int, 10] + max_retries: int + delay_time: float + retry_status_codes: list[int] + api_key: str + name: str + iam_serviceid_crn: str + trusted_profile_id: str + token: str + projects_token: str + username: str + password: str + instance_id: str + version: str + bedrock_url: str + platform_url: str + proxies: dict + + +class WatsonProviderSpec(TypedDict): + """Watson provider specification.""" + + provider: Literal["watson"] + config: WatsonProviderConfig diff --git a/src/crewai/rag/embeddings/providers/ibm/watson.py b/src/crewai/rag/embeddings/providers/ibm/watson.py new file mode 100644 index 000000000..562c3923e --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ibm/watson.py @@ -0,0 +1,126 @@ +"""IBM Watson embeddings provider.""" + +from ibm_watsonx_ai import ( # type: ignore[import-not-found,import-untyped] + APIClient, + Credentials, +) +from pydantic import Field, model_validator +from typing_extensions import Self + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.rag.embeddings.providers.ibm.embedding_callable import ( + WatsonEmbeddingFunction, +) + + +class WatsonProvider(BaseEmbeddingsProvider[WatsonEmbeddingFunction]): + """IBM Watson embeddings provider. + + Note: Requires custom implementation as Watson uses a different interface. + """ + + embedding_callable: type[WatsonEmbeddingFunction] = Field( + default=WatsonEmbeddingFunction, description="Watson embedding function class" + ) + model_id: str = Field( + description="Watson model ID", validation_alias="WATSON_MODEL_ID" + ) + params: dict[str, str | dict[str, str]] | None = Field( + default=None, description="Additional parameters" + ) + credentials: Credentials | None = Field( + default=None, description="Watson credentials" + ) + project_id: str | None = Field( + default=None, + description="Watson project ID", + validation_alias="WATSON_PROJECT_ID", + ) + space_id: str | None = Field( + default=None, description="Watson space ID", validation_alias="WATSON_SPACE_ID" + ) + api_client: APIClient | None = Field(default=None, description="Watson API client") + verify: bool | str | None = Field( + default=None, description="SSL verification", validation_alias="WATSON_VERIFY" + ) + persistent_connection: bool = Field( + default=True, + description="Use persistent connection", + validation_alias="WATSON_PERSISTENT_CONNECTION", + ) + batch_size: int = Field( + default=100, + description="Batch size for processing", + validation_alias="WATSON_BATCH_SIZE", + ) + concurrency_limit: int = Field( + default=10, + description="Concurrency limit", + validation_alias="WATSON_CONCURRENCY_LIMIT", + ) + max_retries: int | None = Field( + default=None, + description="Maximum retries", + validation_alias="WATSON_MAX_RETRIES", + ) + delay_time: float | None = Field( + default=None, + description="Delay time between retries", + validation_alias="WATSON_DELAY_TIME", + ) + retry_status_codes: list[int] | None = Field( + default=None, description="HTTP status codes to retry on" + ) + url: str = Field(description="Watson API URL", validation_alias="WATSON_URL") + api_key: str = Field( + description="Watson API key", validation_alias="WATSON_API_KEY" + ) + name: str | None = Field( + default=None, description="Service name", validation_alias="WATSON_NAME" + ) + iam_serviceid_crn: str | None = Field( + default=None, + description="IAM service ID CRN", + validation_alias="WATSON_IAM_SERVICEID_CRN", + ) + trusted_profile_id: str | None = Field( + default=None, + description="Trusted profile ID", + validation_alias="WATSON_TRUSTED_PROFILE_ID", + ) + token: str | None = Field( + default=None, description="Bearer token", validation_alias="WATSON_TOKEN" + ) + projects_token: str | None = Field( + default=None, + description="Projects token", + validation_alias="WATSON_PROJECTS_TOKEN", + ) + username: str | None = Field( + default=None, description="Username", validation_alias="WATSON_USERNAME" + ) + password: str | None = Field( + default=None, description="Password", validation_alias="WATSON_PASSWORD" + ) + instance_id: str | None = Field( + default=None, + description="Service instance ID", + validation_alias="WATSON_INSTANCE_ID", + ) + version: str | None = Field( + default=None, description="API version", validation_alias="WATSON_VERSION" + ) + bedrock_url: str | None = Field( + default=None, description="Bedrock URL", validation_alias="WATSON_BEDROCK_URL" + ) + platform_url: str | None = Field( + default=None, description="Platform URL", validation_alias="WATSON_PLATFORM_URL" + ) + proxies: dict | None = Field(default=None, description="Proxy configuration") + + @model_validator(mode="after") + def validate_space_or_project(self) -> Self: + """Validate that either space_id or project_id is provided.""" + if not self.space_id and not self.project_id: + raise ValueError("One of 'space_id' or 'project_id' must be provided") + return self diff --git a/src/crewai/rag/embeddings/providers/instructor/__init__.py b/src/crewai/rag/embeddings/providers/instructor/__init__.py new file mode 100644 index 000000000..987c797b0 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/instructor/__init__.py @@ -0,0 +1,15 @@ +"""Instructor embedding providers.""" + +from crewai.rag.embeddings.providers.instructor.instructor_provider import ( + InstructorProvider, +) +from crewai.rag.embeddings.providers.instructor.types import ( + InstructorProviderConfig, + InstructorProviderSpec, +) + +__all__ = [ + "InstructorProvider", + "InstructorProviderConfig", + "InstructorProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/instructor/instructor_provider.py b/src/crewai/rag/embeddings/providers/instructor/instructor_provider.py new file mode 100644 index 000000000..5c68f9c60 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/instructor/instructor_provider.py @@ -0,0 +1,32 @@ +"""Instructor embeddings provider.""" + +from chromadb.utils.embedding_functions.instructor_embedding_function import ( + InstructorEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class InstructorProvider(BaseEmbeddingsProvider[InstructorEmbeddingFunction]): + """Instructor embeddings provider.""" + + embedding_callable: type[InstructorEmbeddingFunction] = Field( + default=InstructorEmbeddingFunction, + description="Instructor embedding function class", + ) + model_name: str = Field( + default="hkunlp/instructor-base", + description="Model name to use", + validation_alias="INSTRUCTOR_MODEL_NAME", + ) + device: str = Field( + default="cpu", + description="Device to run model on (cpu or cuda)", + validation_alias="INSTRUCTOR_DEVICE", + ) + instruction: str | None = Field( + default=None, + description="Instruction for embeddings", + validation_alias="INSTRUCTOR_INSTRUCTION", + ) diff --git a/src/crewai/rag/embeddings/providers/instructor/types.py b/src/crewai/rag/embeddings/providers/instructor/types.py new file mode 100644 index 000000000..b75ee89a4 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/instructor/types.py @@ -0,0 +1,18 @@ +"""Type definitions for Instructor embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class InstructorProviderConfig(TypedDict, total=False): + """Configuration for Instructor provider.""" + + model_name: Annotated[str, "hkunlp/instructor-base"] + device: Annotated[str, "cpu"] + instruction: str + + +class InstructorProviderSpec(TypedDict): + """Instructor provider specification.""" + + provider: Literal["instructor"] + config: InstructorProviderConfig diff --git a/src/crewai/rag/embeddings/providers/jina/__init__.py b/src/crewai/rag/embeddings/providers/jina/__init__.py new file mode 100644 index 000000000..c01f633bb --- /dev/null +++ b/src/crewai/rag/embeddings/providers/jina/__init__.py @@ -0,0 +1,13 @@ +"""Jina embedding providers.""" + +from crewai.rag.embeddings.providers.jina.jina_provider import JinaProvider +from crewai.rag.embeddings.providers.jina.types import ( + JinaProviderConfig, + JinaProviderSpec, +) + +__all__ = [ + "JinaProvider", + "JinaProviderConfig", + "JinaProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/jina/jina_provider.py b/src/crewai/rag/embeddings/providers/jina/jina_provider.py new file mode 100644 index 000000000..8f9100784 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/jina/jina_provider.py @@ -0,0 +1,22 @@ +"""Jina embeddings provider.""" + +from chromadb.utils.embedding_functions.jina_embedding_function import ( + JinaEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class JinaProvider(BaseEmbeddingsProvider[JinaEmbeddingFunction]): + """Jina embeddings provider.""" + + embedding_callable: type[JinaEmbeddingFunction] = Field( + default=JinaEmbeddingFunction, description="Jina embedding function class" + ) + api_key: str = Field(description="Jina API key", validation_alias="JINA_API_KEY") + model_name: str = Field( + default="jina-embeddings-v2-base-en", + description="Model name to use for embeddings", + validation_alias="JINA_MODEL_NAME", + ) diff --git a/src/crewai/rag/embeddings/providers/jina/types.py b/src/crewai/rag/embeddings/providers/jina/types.py new file mode 100644 index 000000000..2ebc1669e --- /dev/null +++ b/src/crewai/rag/embeddings/providers/jina/types.py @@ -0,0 +1,17 @@ +"""Type definitions for Jina embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class JinaProviderConfig(TypedDict, total=False): + """Configuration for Jina provider.""" + + api_key: str + model_name: Annotated[str, "jina-embeddings-v2-base-en"] + + +class JinaProviderSpec(TypedDict): + """Jina provider specification.""" + + provider: Literal["jina"] + config: JinaProviderConfig diff --git a/src/crewai/rag/embeddings/providers/microsoft/__init__.py b/src/crewai/rag/embeddings/providers/microsoft/__init__.py new file mode 100644 index 000000000..2f8d4b3d6 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/microsoft/__init__.py @@ -0,0 +1,15 @@ +"""Microsoft embedding providers.""" + +from crewai.rag.embeddings.providers.microsoft.azure import ( + AzureProvider, +) +from crewai.rag.embeddings.providers.microsoft.types import ( + AzureProviderConfig, + AzureProviderSpec, +) + +__all__ = [ + "AzureProvider", + "AzureProviderConfig", + "AzureProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/microsoft/azure.py b/src/crewai/rag/embeddings/providers/microsoft/azure.py new file mode 100644 index 000000000..645ec665c --- /dev/null +++ b/src/crewai/rag/embeddings/providers/microsoft/azure.py @@ -0,0 +1,58 @@ +"""Azure OpenAI embeddings provider.""" + +from typing import Any + +from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class AzureProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]): + """Azure OpenAI embeddings provider.""" + + embedding_callable: type[OpenAIEmbeddingFunction] = Field( + default=OpenAIEmbeddingFunction, + description="Azure OpenAI embedding function class", + ) + api_key: str = Field(description="Azure API key", validation_alias="OPENAI_API_KEY") + api_base: str | None = Field( + default=None, + description="Azure endpoint URL", + validation_alias="OPENAI_API_BASE", + ) + api_type: str = Field( + default="azure", + description="API type for Azure", + validation_alias="OPENAI_API_TYPE", + ) + api_version: str | None = Field( + default=None, + description="Azure API version", + validation_alias="OPENAI_API_VERSION", + ) + model_name: str = Field( + default="text-embedding-ada-002", + description="Model name to use for embeddings", + validation_alias="OPENAI_MODEL_NAME", + ) + default_headers: dict[str, Any] | None = Field( + default=None, description="Default headers for API requests" + ) + dimensions: int | None = Field( + default=None, + description="Embedding dimensions", + validation_alias="OPENAI_DIMENSIONS", + ) + deployment_id: str | None = Field( + default=None, + description="Azure deployment ID", + validation_alias="OPENAI_DEPLOYMENT_ID", + ) + organization_id: str | None = Field( + default=None, + description="Organization ID", + validation_alias="OPENAI_ORGANIZATION_ID", + ) diff --git a/src/crewai/rag/embeddings/providers/microsoft/types.py b/src/crewai/rag/embeddings/providers/microsoft/types.py new file mode 100644 index 000000000..2d7bf6e8b --- /dev/null +++ b/src/crewai/rag/embeddings/providers/microsoft/types.py @@ -0,0 +1,24 @@ +"""Type definitions for Microsoft Azure embedding providers.""" + +from typing import Annotated, Any, Literal, TypedDict + + +class AzureProviderConfig(TypedDict, total=False): + """Configuration for Azure provider.""" + + api_key: str + api_base: str + api_type: Annotated[str, "azure"] + api_version: str + model_name: Annotated[str, "text-embedding-ada-002"] + default_headers: dict[str, Any] + dimensions: int + deployment_id: str + organization_id: str + + +class AzureProviderSpec(TypedDict): + """Azure provider specification.""" + + provider: Literal["azure"] + config: AzureProviderConfig diff --git a/src/crewai/rag/embeddings/providers/ollama/__init__.py b/src/crewai/rag/embeddings/providers/ollama/__init__.py new file mode 100644 index 000000000..91c82ed44 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ollama/__init__.py @@ -0,0 +1,15 @@ +"""Ollama embedding providers.""" + +from crewai.rag.embeddings.providers.ollama.ollama_provider import ( + OllamaProvider, +) +from crewai.rag.embeddings.providers.ollama.types import ( + OllamaProviderConfig, + OllamaProviderSpec, +) + +__all__ = [ + "OllamaProvider", + "OllamaProviderConfig", + "OllamaProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/ollama/ollama_provider.py b/src/crewai/rag/embeddings/providers/ollama/ollama_provider.py new file mode 100644 index 000000000..363f76c48 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ollama/ollama_provider.py @@ -0,0 +1,25 @@ +"""Ollama embeddings provider.""" + +from chromadb.utils.embedding_functions.ollama_embedding_function import ( + OllamaEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class OllamaProvider(BaseEmbeddingsProvider[OllamaEmbeddingFunction]): + """Ollama embeddings provider.""" + + embedding_callable: type[OllamaEmbeddingFunction] = Field( + default=OllamaEmbeddingFunction, description="Ollama embedding function class" + ) + url: str = Field( + default="http://localhost:11434/api/embeddings", + description="Ollama API endpoint URL", + validation_alias="OLLAMA_URL", + ) + model_name: str = Field( + description="Model name to use for embeddings", + validation_alias="OLLAMA_MODEL_NAME", + ) diff --git a/src/crewai/rag/embeddings/providers/ollama/types.py b/src/crewai/rag/embeddings/providers/ollama/types.py new file mode 100644 index 000000000..fdd8953b7 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/ollama/types.py @@ -0,0 +1,17 @@ +"""Type definitions for Ollama embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class OllamaProviderConfig(TypedDict, total=False): + """Configuration for Ollama provider.""" + + url: Annotated[str, "http://localhost:11434/api/embeddings"] + model_name: str + + +class OllamaProviderSpec(TypedDict): + """Ollama provider specification.""" + + provider: Literal["ollama"] + config: OllamaProviderConfig diff --git a/src/crewai/rag/embeddings/providers/onnx/__init__.py b/src/crewai/rag/embeddings/providers/onnx/__init__.py new file mode 100644 index 000000000..a18928611 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/onnx/__init__.py @@ -0,0 +1,13 @@ +"""ONNX embedding providers.""" + +from crewai.rag.embeddings.providers.onnx.onnx_provider import ONNXProvider +from crewai.rag.embeddings.providers.onnx.types import ( + ONNXProviderConfig, + ONNXProviderSpec, +) + +__all__ = [ + "ONNXProvider", + "ONNXProviderConfig", + "ONNXProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/onnx/onnx_provider.py b/src/crewai/rag/embeddings/providers/onnx/onnx_provider.py new file mode 100644 index 000000000..71cb9a99b --- /dev/null +++ b/src/crewai/rag/embeddings/providers/onnx/onnx_provider.py @@ -0,0 +1,19 @@ +"""ONNX embeddings provider.""" + +from chromadb.utils.embedding_functions.onnx_mini_lm_l6_v2 import ONNXMiniLM_L6_V2 +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class ONNXProvider(BaseEmbeddingsProvider[ONNXMiniLM_L6_V2]): + """ONNX embeddings provider.""" + + embedding_callable: type[ONNXMiniLM_L6_V2] = Field( + default=ONNXMiniLM_L6_V2, description="ONNX MiniLM embedding function class" + ) + preferred_providers: list[str] | None = Field( + default=None, + description="Preferred ONNX execution providers", + validation_alias="ONNX_PREFERRED_PROVIDERS", + ) diff --git a/src/crewai/rag/embeddings/providers/onnx/types.py b/src/crewai/rag/embeddings/providers/onnx/types.py new file mode 100644 index 000000000..6c3191f58 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/onnx/types.py @@ -0,0 +1,16 @@ +"""Type definitions for ONNX embedding providers.""" + +from typing import Literal, TypedDict + + +class ONNXProviderConfig(TypedDict, total=False): + """Configuration for ONNX provider.""" + + preferred_providers: list[str] + + +class ONNXProviderSpec(TypedDict): + """ONNX provider specification.""" + + provider: Literal["onnx"] + config: ONNXProviderConfig diff --git a/src/crewai/rag/embeddings/providers/openai/__init__.py b/src/crewai/rag/embeddings/providers/openai/__init__.py new file mode 100644 index 000000000..847039352 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/openai/__init__.py @@ -0,0 +1,15 @@ +"""OpenAI embedding providers.""" + +from crewai.rag.embeddings.providers.openai.openai_provider import ( + OpenAIProvider, +) +from crewai.rag.embeddings.providers.openai.types import ( + OpenAIProviderConfig, + OpenAIProviderSpec, +) + +__all__ = [ + "OpenAIProvider", + "OpenAIProviderConfig", + "OpenAIProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/openai/openai_provider.py b/src/crewai/rag/embeddings/providers/openai/openai_provider.py new file mode 100644 index 000000000..dc454a30a --- /dev/null +++ b/src/crewai/rag/embeddings/providers/openai/openai_provider.py @@ -0,0 +1,58 @@ +"""OpenAI embeddings provider.""" + +from typing import Any + +from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class OpenAIProvider(BaseEmbeddingsProvider[OpenAIEmbeddingFunction]): + """OpenAI embeddings provider.""" + + embedding_callable: type[OpenAIEmbeddingFunction] = Field( + default=OpenAIEmbeddingFunction, + description="OpenAI embedding function class", + ) + api_key: str = Field( + description="OpenAI API key", validation_alias="OPENAI_API_KEY" + ) + model_name: str = Field( + default="text-embedding-ada-002", + description="Model name to use for embeddings", + validation_alias="OPENAI_MODEL_NAME", + ) + api_base: str | None = Field( + default=None, + description="Base URL for API requests", + validation_alias="OPENAI_API_BASE", + ) + api_type: str | None = Field( + default=None, + description="API type (e.g., 'azure')", + validation_alias="OPENAI_API_TYPE", + ) + api_version: str | None = Field( + default=None, description="API version", validation_alias="OPENAI_API_VERSION" + ) + default_headers: dict[str, Any] | None = Field( + default=None, description="Default headers for API requests" + ) + dimensions: int | None = Field( + default=None, + description="Embedding dimensions", + validation_alias="OPENAI_DIMENSIONS", + ) + deployment_id: str | None = Field( + default=None, + description="Azure deployment ID", + validation_alias="OPENAI_DEPLOYMENT_ID", + ) + organization_id: str | None = Field( + default=None, + description="OpenAI organization ID", + validation_alias="OPENAI_ORGANIZATION_ID", + ) diff --git a/src/crewai/rag/embeddings/providers/openai/types.py b/src/crewai/rag/embeddings/providers/openai/types.py new file mode 100644 index 000000000..4da0ba770 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/openai/types.py @@ -0,0 +1,24 @@ +"""Type definitions for OpenAI embedding providers.""" + +from typing import Annotated, Any, Literal, TypedDict + + +class OpenAIProviderConfig(TypedDict, total=False): + """Configuration for OpenAI provider.""" + + api_key: str + model_name: Annotated[str, "text-embedding-ada-002"] + api_base: str + api_type: str + api_version: str + default_headers: dict[str, Any] + dimensions: int + deployment_id: str + organization_id: str + + +class OpenAIProviderSpec(TypedDict): + """OpenAI provider specification.""" + + provider: Literal["openai"] + config: OpenAIProviderConfig diff --git a/src/crewai/rag/embeddings/providers/openclip/__init__.py b/src/crewai/rag/embeddings/providers/openclip/__init__.py new file mode 100644 index 000000000..0a37506b0 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/openclip/__init__.py @@ -0,0 +1,15 @@ +"""OpenCLIP embedding providers.""" + +from crewai.rag.embeddings.providers.openclip.openclip_provider import ( + OpenCLIPProvider, +) +from crewai.rag.embeddings.providers.openclip.types import ( + OpenCLIPProviderConfig, + OpenCLIPProviderSpec, +) + +__all__ = [ + "OpenCLIPProvider", + "OpenCLIPProviderConfig", + "OpenCLIPProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/openclip/openclip_provider.py b/src/crewai/rag/embeddings/providers/openclip/openclip_provider.py new file mode 100644 index 000000000..aaed1e272 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/openclip/openclip_provider.py @@ -0,0 +1,32 @@ +"""OpenCLIP embeddings provider.""" + +from chromadb.utils.embedding_functions.open_clip_embedding_function import ( + OpenCLIPEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class OpenCLIPProvider(BaseEmbeddingsProvider[OpenCLIPEmbeddingFunction]): + """OpenCLIP embeddings provider.""" + + embedding_callable: type[OpenCLIPEmbeddingFunction] = Field( + default=OpenCLIPEmbeddingFunction, + description="OpenCLIP embedding function class", + ) + model_name: str = Field( + default="ViT-B-32", + description="Model name to use", + validation_alias="OPENCLIP_MODEL_NAME", + ) + checkpoint: str = Field( + default="laion2b_s34b_b79k", + description="Model checkpoint", + validation_alias="OPENCLIP_CHECKPOINT", + ) + device: str | None = Field( + default="cpu", + description="Device to run model on", + validation_alias="OPENCLIP_DEVICE", + ) diff --git a/src/crewai/rag/embeddings/providers/openclip/types.py b/src/crewai/rag/embeddings/providers/openclip/types.py new file mode 100644 index 000000000..2bd7b8055 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/openclip/types.py @@ -0,0 +1,18 @@ +"""Type definitions for OpenCLIP embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class OpenCLIPProviderConfig(TypedDict, total=False): + """Configuration for OpenCLIP provider.""" + + model_name: Annotated[str, "ViT-B-32"] + checkpoint: Annotated[str, "laion2b_s34b_b79k"] + device: Annotated[str, "cpu"] + + +class OpenCLIPProviderSpec(TypedDict): + """OpenCLIP provider specification.""" + + provider: Literal["openclip"] + config: OpenCLIPProviderConfig diff --git a/src/crewai/rag/embeddings/providers/roboflow/__init__.py b/src/crewai/rag/embeddings/providers/roboflow/__init__.py new file mode 100644 index 000000000..7821a0160 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/roboflow/__init__.py @@ -0,0 +1,15 @@ +"""Roboflow embedding providers.""" + +from crewai.rag.embeddings.providers.roboflow.roboflow_provider import ( + RoboflowProvider, +) +from crewai.rag.embeddings.providers.roboflow.types import ( + RoboflowProviderConfig, + RoboflowProviderSpec, +) + +__all__ = [ + "RoboflowProvider", + "RoboflowProviderConfig", + "RoboflowProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/roboflow/roboflow_provider.py b/src/crewai/rag/embeddings/providers/roboflow/roboflow_provider.py new file mode 100644 index 000000000..19bbd4b4b --- /dev/null +++ b/src/crewai/rag/embeddings/providers/roboflow/roboflow_provider.py @@ -0,0 +1,25 @@ +"""Roboflow embeddings provider.""" + +from chromadb.utils.embedding_functions.roboflow_embedding_function import ( + RoboflowEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class RoboflowProvider(BaseEmbeddingsProvider[RoboflowEmbeddingFunction]): + """Roboflow embeddings provider.""" + + embedding_callable: type[RoboflowEmbeddingFunction] = Field( + default=RoboflowEmbeddingFunction, + description="Roboflow embedding function class", + ) + api_key: str = Field( + default="", description="Roboflow API key", validation_alias="ROBOFLOW_API_KEY" + ) + api_url: str = Field( + default="https://infer.roboflow.com", + description="Roboflow API URL", + validation_alias="ROBOFLOW_API_URL", + ) diff --git a/src/crewai/rag/embeddings/providers/roboflow/types.py b/src/crewai/rag/embeddings/providers/roboflow/types.py new file mode 100644 index 000000000..fded96517 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/roboflow/types.py @@ -0,0 +1,17 @@ +"""Type definitions for Roboflow embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class RoboflowProviderConfig(TypedDict, total=False): + """Configuration for Roboflow provider.""" + + api_key: Annotated[str, ""] + api_url: Annotated[str, "https://infer.roboflow.com"] + + +class RoboflowProviderSpec(TypedDict): + """Roboflow provider specification.""" + + provider: Literal["roboflow"] + config: RoboflowProviderConfig diff --git a/src/crewai/rag/embeddings/providers/sentence_transformer/__init__.py b/src/crewai/rag/embeddings/providers/sentence_transformer/__init__.py new file mode 100644 index 000000000..7aaf2ef33 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/sentence_transformer/__init__.py @@ -0,0 +1,15 @@ +"""SentenceTransformer embedding providers.""" + +from crewai.rag.embeddings.providers.sentence_transformer.sentence_transformer_provider import ( + SentenceTransformerProvider, +) +from crewai.rag.embeddings.providers.sentence_transformer.types import ( + SentenceTransformerProviderConfig, + SentenceTransformerProviderSpec, +) + +__all__ = [ + "SentenceTransformerProvider", + "SentenceTransformerProviderConfig", + "SentenceTransformerProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/sentence_transformer/sentence_transformer_provider.py b/src/crewai/rag/embeddings/providers/sentence_transformer/sentence_transformer_provider.py new file mode 100644 index 000000000..e0ecf9cef --- /dev/null +++ b/src/crewai/rag/embeddings/providers/sentence_transformer/sentence_transformer_provider.py @@ -0,0 +1,34 @@ +"""SentenceTransformer embeddings provider.""" + +from chromadb.utils.embedding_functions.sentence_transformer_embedding_function import ( + SentenceTransformerEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class SentenceTransformerProvider( + BaseEmbeddingsProvider[SentenceTransformerEmbeddingFunction] +): + """SentenceTransformer embeddings provider.""" + + embedding_callable: type[SentenceTransformerEmbeddingFunction] = Field( + default=SentenceTransformerEmbeddingFunction, + description="SentenceTransformer embedding function class", + ) + model_name: str = Field( + default="all-MiniLM-L6-v2", + description="Model name to use", + validation_alias="SENTENCE_TRANSFORMER_MODEL_NAME", + ) + device: str = Field( + default="cpu", + description="Device to run model on (cpu or cuda)", + validation_alias="SENTENCE_TRANSFORMER_DEVICE", + ) + normalize_embeddings: bool = Field( + default=False, + description="Whether to normalize embeddings", + validation_alias="SENTENCE_TRANSFORMER_NORMALIZE_EMBEDDINGS", + ) diff --git a/src/crewai/rag/embeddings/providers/sentence_transformer/types.py b/src/crewai/rag/embeddings/providers/sentence_transformer/types.py new file mode 100644 index 000000000..d186879b6 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/sentence_transformer/types.py @@ -0,0 +1,18 @@ +"""Type definitions for SentenceTransformer embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class SentenceTransformerProviderConfig(TypedDict, total=False): + """Configuration for SentenceTransformer provider.""" + + model_name: Annotated[str, "all-MiniLM-L6-v2"] + device: Annotated[str, "cpu"] + normalize_embeddings: Annotated[bool, False] + + +class SentenceTransformerProviderSpec(TypedDict): + """SentenceTransformer provider specification.""" + + provider: Literal["sentence-transformer"] + config: SentenceTransformerProviderConfig diff --git a/src/crewai/rag/embeddings/providers/text2vec/__init__.py b/src/crewai/rag/embeddings/providers/text2vec/__init__.py new file mode 100644 index 000000000..07f9808c6 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/text2vec/__init__.py @@ -0,0 +1,15 @@ +"""Text2Vec embedding providers.""" + +from crewai.rag.embeddings.providers.text2vec.text2vec_provider import ( + Text2VecProvider, +) +from crewai.rag.embeddings.providers.text2vec.types import ( + Text2VecProviderConfig, + Text2VecProviderSpec, +) + +__all__ = [ + "Text2VecProvider", + "Text2VecProviderConfig", + "Text2VecProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/text2vec/text2vec_provider.py b/src/crewai/rag/embeddings/providers/text2vec/text2vec_provider.py new file mode 100644 index 000000000..e66e60da4 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/text2vec/text2vec_provider.py @@ -0,0 +1,22 @@ +"""Text2Vec embeddings provider.""" + +from chromadb.utils.embedding_functions.text2vec_embedding_function import ( + Text2VecEmbeddingFunction, +) +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider + + +class Text2VecProvider(BaseEmbeddingsProvider[Text2VecEmbeddingFunction]): + """Text2Vec embeddings provider.""" + + embedding_callable: type[Text2VecEmbeddingFunction] = Field( + default=Text2VecEmbeddingFunction, + description="Text2Vec embedding function class", + ) + model_name: str = Field( + default="shibing624/text2vec-base-chinese", + description="Model name to use", + validation_alias="TEXT2VEC_MODEL_NAME", + ) diff --git a/src/crewai/rag/embeddings/providers/text2vec/types.py b/src/crewai/rag/embeddings/providers/text2vec/types.py new file mode 100644 index 000000000..f7aa8a171 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/text2vec/types.py @@ -0,0 +1,16 @@ +"""Type definitions for Text2Vec embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class Text2VecProviderConfig(TypedDict, total=False): + """Configuration for Text2Vec provider.""" + + model_name: Annotated[str, "shibing624/text2vec-base-chinese"] + + +class Text2VecProviderSpec(TypedDict): + """Text2Vec provider specification.""" + + provider: Literal["text2vec"] + config: Text2VecProviderConfig diff --git a/src/crewai/rag/embeddings/providers/voyageai/__init__.py b/src/crewai/rag/embeddings/providers/voyageai/__init__.py new file mode 100644 index 000000000..b0735ec13 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/voyageai/__init__.py @@ -0,0 +1,15 @@ +"""VoyageAI embedding providers.""" + +from crewai.rag.embeddings.providers.voyageai.types import ( + VoyageAIProviderConfig, + VoyageAIProviderSpec, +) +from crewai.rag.embeddings.providers.voyageai.voyageai_provider import ( + VoyageAIProvider, +) + +__all__ = [ + "VoyageAIProvider", + "VoyageAIProviderConfig", + "VoyageAIProviderSpec", +] diff --git a/src/crewai/rag/embeddings/providers/voyageai/embedding_callable.py b/src/crewai/rag/embeddings/providers/voyageai/embedding_callable.py new file mode 100644 index 000000000..39490ac0e --- /dev/null +++ b/src/crewai/rag/embeddings/providers/voyageai/embedding_callable.py @@ -0,0 +1,50 @@ +"""VoyageAI embedding function implementation.""" + +from typing import cast + +import voyageai +from typing_extensions import Unpack + +from crewai.rag.core.base_embeddings_callable import EmbeddingFunction +from crewai.rag.core.types import Documents, Embeddings +from crewai.rag.embeddings.providers.voyageai.types import VoyageAIProviderConfig + + +class VoyageAIEmbeddingFunction(EmbeddingFunction[Documents]): + """Embedding function for VoyageAI models.""" + + def __init__(self, **kwargs: Unpack[VoyageAIProviderConfig]) -> None: + """Initialize VoyageAI embedding function. + + Args: + **kwargs: Configuration parameters for VoyageAI. + """ + self._config = kwargs + self._client = voyageai.Client( + api_key=kwargs["api_key"], + max_retries=kwargs.get("max_retries", 0), + timeout=kwargs.get("timeout"), + ) + + def __call__(self, input: Documents) -> Embeddings: + """Generate embeddings for input documents. + + Args: + input: List of documents to embed. + + Returns: + List of embedding vectors. + """ + if isinstance(input, str): + input = [input] + + result = self._client.embed( + texts=input, + model=self._config.get("model", "voyage-2"), + input_type=self._config.get("input_type"), + truncation=self._config.get("truncation", True), + output_dtype=self._config.get("output_dtype"), + output_dimension=self._config.get("output_dimension"), + ) + + return cast(Embeddings, result.embeddings) diff --git a/src/crewai/rag/embeddings/providers/voyageai/types.py b/src/crewai/rag/embeddings/providers/voyageai/types.py new file mode 100644 index 000000000..4d959b8a4 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/voyageai/types.py @@ -0,0 +1,23 @@ +"""Type definitions for VoyageAI embedding providers.""" + +from typing import Annotated, Literal, TypedDict + + +class VoyageAIProviderConfig(TypedDict, total=False): + """Configuration for VoyageAI provider.""" + + api_key: str + model: Annotated[str, "voyage-2"] + input_type: str + truncation: Annotated[bool, True] + output_dtype: str + output_dimension: int + max_retries: Annotated[int, 0] + timeout: float + + +class VoyageAIProviderSpec(TypedDict): + """VoyageAI provider specification.""" + + provider: Literal["voyageai"] + config: VoyageAIProviderConfig diff --git a/src/crewai/rag/embeddings/providers/voyageai/voyageai_provider.py b/src/crewai/rag/embeddings/providers/voyageai/voyageai_provider.py new file mode 100644 index 000000000..1c06e62b8 --- /dev/null +++ b/src/crewai/rag/embeddings/providers/voyageai/voyageai_provider.py @@ -0,0 +1,55 @@ +"""Voyage AI embeddings provider.""" + +from pydantic import Field + +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.rag.embeddings.providers.voyageai.embedding_callable import ( + VoyageAIEmbeddingFunction, +) + + +class VoyageAIProvider(BaseEmbeddingsProvider[VoyageAIEmbeddingFunction]): + """Voyage AI embeddings provider.""" + + embedding_callable: type[VoyageAIEmbeddingFunction] = Field( + default=VoyageAIEmbeddingFunction, + description="Voyage AI embedding function class", + ) + model: str = Field( + default="voyage-2", + description="Model to use for embeddings", + validation_alias="VOYAGEAI_MODEL", + ) + api_key: str = Field( + description="Voyage AI API key", validation_alias="VOYAGEAI_API_KEY" + ) + input_type: str | None = Field( + default=None, + description="Input type for embeddings", + validation_alias="VOYAGEAI_INPUT_TYPE", + ) + truncation: bool = Field( + default=True, + description="Whether to truncate inputs", + validation_alias="VOYAGEAI_TRUNCATION", + ) + output_dtype: str | None = Field( + default=None, + description="Output data type", + validation_alias="VOYAGEAI_OUTPUT_DTYPE", + ) + output_dimension: int | None = Field( + default=None, + description="Output dimension", + validation_alias="VOYAGEAI_OUTPUT_DIMENSION", + ) + max_retries: int = Field( + default=0, + description="Maximum retries for API calls", + validation_alias="VOYAGEAI_MAX_RETRIES", + ) + timeout: float | None = Field( + default=None, + description="Timeout for API calls", + validation_alias="VOYAGEAI_TIMEOUT", + ) diff --git a/src/crewai/rag/embeddings/types.py b/src/crewai/rag/embeddings/types.py index 5024d5513..ceafac805 100644 --- a/src/crewai/rag/embeddings/types.py +++ b/src/crewai/rag/embeddings/types.py @@ -2,61 +2,67 @@ from typing import Literal -from pydantic import BaseModel, Field, SecretStr +from crewai.rag.embeddings.providers.aws.types import BedrockProviderSpec +from crewai.rag.embeddings.providers.cohere.types import CohereProviderSpec +from crewai.rag.embeddings.providers.custom.types import CustomProviderSpec +from crewai.rag.embeddings.providers.google.types import ( + GenerativeAiProviderSpec, + VertexAIProviderSpec, +) +from crewai.rag.embeddings.providers.huggingface.types import HuggingFaceProviderSpec +from crewai.rag.embeddings.providers.ibm.types import WatsonProviderSpec +from crewai.rag.embeddings.providers.instructor.types import InstructorProviderSpec +from crewai.rag.embeddings.providers.jina.types import JinaProviderSpec +from crewai.rag.embeddings.providers.microsoft.types import AzureProviderSpec +from crewai.rag.embeddings.providers.ollama.types import OllamaProviderSpec +from crewai.rag.embeddings.providers.onnx.types import ONNXProviderSpec +from crewai.rag.embeddings.providers.openai.types import OpenAIProviderSpec +from crewai.rag.embeddings.providers.openclip.types import OpenCLIPProviderSpec +from crewai.rag.embeddings.providers.roboflow.types import RoboflowProviderSpec +from crewai.rag.embeddings.providers.sentence_transformer.types import ( + SentenceTransformerProviderSpec, +) +from crewai.rag.embeddings.providers.text2vec.types import Text2VecProviderSpec +from crewai.rag.embeddings.providers.voyageai.types import VoyageAIProviderSpec -from crewai.rag.types import EmbeddingFunction +ProviderSpec = ( + AzureProviderSpec + | BedrockProviderSpec + | CohereProviderSpec + | CustomProviderSpec + | GenerativeAiProviderSpec + | HuggingFaceProviderSpec + | InstructorProviderSpec + | JinaProviderSpec + | OllamaProviderSpec + | ONNXProviderSpec + | OpenAIProviderSpec + | OpenCLIPProviderSpec + | RoboflowProviderSpec + | SentenceTransformerProviderSpec + | Text2VecProviderSpec + | VertexAIProviderSpec + | VoyageAIProviderSpec + | WatsonProviderSpec +) -EmbeddingProvider = Literal[ - "openai", +AllowedEmbeddingProviders = Literal[ + "azure", + "amazon-bedrock", "cohere", - "ollama", - "huggingface", - "sentence-transformer", - "instructor", - "google-palm", + "custom", "google-generativeai", "google-vertex", - "amazon-bedrock", + "huggingface", + "instructor", "jina", - "roboflow", - "openclip", - "text2vec", + "ollama", "onnx", + "openai", + "openclip", + "roboflow", + "sentence-transformer", + "text2vec", + "voyageai", + "watson", ] -"""Supported embedding providers. - -These correspond to the embedding functions available in ChromaDB's -embedding_functions module. Each provider has specific requirements -and configuration options. -""" - - -class EmbeddingOptions(BaseModel): - """Configuration options for embedding providers. - - Generic attributes that can be passed to get_embedding_function - to configure various embedding providers. - """ - - provider: EmbeddingProvider = Field( - ..., description="Embedding provider name (e.g., 'openai', 'cohere', 'onnx')" - ) - model_name: str | None = Field( - default=None, description="Model name for the embedding provider" - ) - api_key: SecretStr | None = Field( - default=None, description="API key for the embedding provider" - ) - - -class EmbeddingConfig(BaseModel): - """Configuration wrapper for embedding functions. - - Accepts either a pre-configured EmbeddingFunction or EmbeddingOptions - to create one. This provides flexibility in how embeddings are configured. - - Attributes: - function: Either a callable EmbeddingFunction or EmbeddingOptions to create one - """ - - function: EmbeddingFunction | EmbeddingOptions diff --git a/src/crewai/rag/storage/base_rag_storage.py b/src/crewai/rag/storage/base_rag_storage.py index 59189820c..27047f124 100644 --- a/src/crewai/rag/storage/base_rag_storage.py +++ b/src/crewai/rag/storage/base_rag_storage.py @@ -1,8 +1,8 @@ from abc import ABC, abstractmethod from typing import Any -from crewai.rag.embeddings.factory import EmbedderConfig -from crewai.rag.embeddings.types import EmbeddingOptions +from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider +from crewai.rag.embeddings.types import ProviderSpec class BaseRAGStorage(ABC): @@ -16,7 +16,7 @@ class BaseRAGStorage(ABC): self, type: str, allow_reset: bool = True, - embedder_config: EmbeddingOptions | EmbedderConfig | None = None, + embedder_config: ProviderSpec | BaseEmbeddingsProvider | None = None, crew: Any = None, ): self.type = type diff --git a/src/crewai/rag/types.py b/src/crewai/rag/types.py index 58c6da5b2..fa154c0c2 100644 --- a/src/crewai/rag/types.py +++ b/src/crewai/rag/types.py @@ -24,8 +24,7 @@ class BaseRecord(TypedDict, total=False): ) -DenseVector: TypeAlias = list[float] -IntVector: TypeAlias = list[int] +Embeddings: TypeAlias = list[list[float]] EmbeddingFunction: TypeAlias = Callable[..., Any] diff --git a/tests/knowledge/test_knowledge_storage_integration.py b/tests/knowledge/test_knowledge_storage_integration.py index 0f9581864..0c457d5d2 100644 --- a/tests/knowledge/test_knowledge_storage_integration.py +++ b/tests/knowledge/test_knowledge_storage_integration.py @@ -11,7 +11,7 @@ from crewai.knowledge.storage.knowledge_storage import ( # type: ignore[import- @patch("crewai.knowledge.storage.knowledge_storage.get_rag_client") @patch("crewai.knowledge.storage.knowledge_storage.create_client") -@patch("crewai.knowledge.storage.knowledge_storage.get_embedding_function") +@patch("crewai.knowledge.storage.knowledge_storage.build_embedder") def test_knowledge_storage_uses_rag_client( mock_get_embedding: MagicMock, mock_create_client: MagicMock, @@ -122,7 +122,7 @@ def test_search_error_handling(mock_get_client: MagicMock) -> None: @patch("crewai.knowledge.storage.knowledge_storage.get_rag_client") -@patch("crewai.knowledge.storage.knowledge_storage.get_embedding_function") +@patch("crewai.knowledge.storage.knowledge_storage.build_embedder") def test_embedding_configuration_flow( mock_get_embedding: MagicMock, mock_get_client: MagicMock ) -> None: diff --git a/tests/rag/embeddings/test_embedding_factory.py b/tests/rag/embeddings/test_embedding_factory.py index 937e5c1e2..fc12701d9 100644 --- a/tests/rag/embeddings/test_embedding_factory.py +++ b/tests/rag/embeddings/test_embedding_factory.py @@ -1,83 +1,89 @@ -"""Enhanced tests for embedding function factory.""" +"""Tests for embedding function factory.""" from unittest.mock import MagicMock, patch import pytest -from pydantic import SecretStr -from crewai.rag.embeddings.factory import ( # type: ignore[import-untyped] - get_embedding_function, -) -from crewai.rag.embeddings.types import EmbeddingOptions # type: ignore[import-untyped] +from crewai.rag.embeddings.factory import build_embedder -def test_get_embedding_function_default() -> None: - """Test default embedding function when no config provided.""" - with patch("crewai.rag.embeddings.factory.OpenAIEmbeddingFunction") as mock_openai: - mock_instance = MagicMock() - mock_openai.return_value = mock_instance +class TestEmbeddingFactory: + """Test embedding factory functions.""" - with patch( - "crewai.rag.embeddings.factory.os.getenv", return_value="test-api-key" - ): - result = get_embedding_function() + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_openai(self, mock_import): + """Test building OpenAI embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() - mock_openai.assert_called_once_with( - api_key="test-api-key", model_name="text-embedding-3-small" - ) - assert result == mock_instance - - -def test_get_embedding_function_with_embedding_options() -> None: - """Test embedding function creation with EmbeddingOptions object.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openai = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openai - mock_providers.__contains__.return_value = True - - options = EmbeddingOptions( - provider="openai", - api_key=SecretStr("test-key"), - model_name="text-embedding-3-large", - ) - - result = get_embedding_function(options) - - call_kwargs = mock_openai.call_args.kwargs - assert "api_key" in call_kwargs - assert call_kwargs["api_key"].get_secret_value() == "test-key" - assert "model_name" in call_kwargs - assert call_kwargs["model_name"] == "text-embedding-3-large" - assert result == mock_instance - - -def test_get_embedding_function_sentence_transformer() -> None: - """Test sentence transformer embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_st = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_st - mock_providers.__contains__.return_value = True + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function config = { - "provider": "sentence-transformer", - "config": {"model_name": "all-MiniLM-L6-v2"}, + "provider": "openai", + "config": { + "api_key": "test-key", + "model_name": "text-embedding-3-small", + }, } - result = get_embedding_function(config) + build_embedder(config) - mock_st.assert_called_once_with(model_name="all-MiniLM-L6-v2") - assert result == mock_instance + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.openai.openai_provider.OpenAIProvider" + ) + mock_provider_class.assert_called_once() + call_kwargs = mock_provider_class.call_args.kwargs + assert call_kwargs["api_key"] == "test-key" + assert call_kwargs["model_name"] == "text-embedding-3-small" -def test_get_embedding_function_ollama() -> None: - """Test Ollama embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_ollama = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_ollama - mock_providers.__contains__.return_value = True + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_azure(self, mock_import): + """Test building Azure embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() + + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function + + config = { + "provider": "azure", + "config": { + "api_key": "test-azure-key", + "api_base": "https://test.openai.azure.com/", + "api_type": "azure", + "api_version": "2023-05-15", + "model_name": "text-embedding-3-small", + "deployment_id": "test-deployment", + }, + } + + build_embedder(config) + + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.microsoft.azure.AzureProvider" + ) + + call_kwargs = mock_provider_class.call_args.kwargs + assert call_kwargs["api_key"] == "test-azure-key" + assert call_kwargs["api_base"] == "https://test.openai.azure.com/" + assert call_kwargs["api_type"] == "azure" + + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_ollama(self, mock_import): + """Test building Ollama embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() + + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function config = { "provider": "ollama", @@ -87,512 +93,152 @@ def test_get_embedding_function_ollama() -> None: }, } - result = get_embedding_function(config) + build_embedder(config) - mock_ollama.assert_called_once_with( - model_name="nomic-embed-text", url="http://localhost:11434" + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.ollama.ollama_provider.OllamaProvider" ) - assert result == mock_instance + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_cohere(self, mock_import): + """Test building Cohere embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() -def test_get_embedding_function_cohere() -> None: - """Test Cohere embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_cohere = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_cohere - mock_providers.__contains__.return_value = True + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function config = { "provider": "cohere", - "config": {"api_key": "cohere-key", "model_name": "embed-english-v3.0"}, - } - - result = get_embedding_function(config) - - mock_cohere.assert_called_once_with( - api_key="cohere-key", model_name="embed-english-v3.0" - ) - assert result == mock_instance - - -def test_get_embedding_function_huggingface() -> None: - """Test HuggingFace embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_hf = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_hf - mock_providers.__contains__.return_value = True - - config = { - "provider": "huggingface", "config": { - "api_key": "hf-token", - "model_name": "sentence-transformers/all-MiniLM-L6-v2", + "api_key": "cohere-key", + "model_name": "embed-english-v3.0", }, } - result = get_embedding_function(config) + build_embedder(config) - mock_hf.assert_called_once_with( - api_key="hf-token", model_name="sentence-transformers/all-MiniLM-L6-v2" + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.cohere.cohere_provider.CohereProvider" ) - assert result == mock_instance + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_voyageai(self, mock_import): + """Test building VoyageAI embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() -def test_get_embedding_function_onnx() -> None: - """Test ONNX embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_onnx = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_onnx - mock_providers.__contains__.return_value = True + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function - config = {"provider": "onnx"} - - result = get_embedding_function(config) - - mock_onnx.assert_called_once() - assert result == mock_instance - - -def test_get_embedding_function_google_palm() -> None: - """Test Google PaLM embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_palm = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_palm - mock_providers.__contains__.return_value = True - - config = {"provider": "google-palm", "config": {"api_key": "palm-key"}} - - result = get_embedding_function(config) - - mock_palm.assert_called_once_with(api_key="palm-key") - assert result == mock_instance - - -def test_get_embedding_function_amazon_bedrock() -> None: - """Test Amazon Bedrock embedding function with explicit session.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_bedrock = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_bedrock - mock_providers.__contains__.return_value = True - - # Provide an explicit session to avoid boto3 import - mock_session = MagicMock() config = { - "provider": "amazon-bedrock", + "provider": "voyageai", "config": { - "session": mock_session, - "region_name": "us-west-2", - "model_name": "amazon.titan-embed-text-v1", + "api_key": "voyage-key", + "model": "voyage-2", }, } - result = get_embedding_function(config) + build_embedder(config) - mock_bedrock.assert_called_once_with( - session=mock_session, - region_name="us-west-2", - model_name="amazon.titan-embed-text-v1", + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.voyageai.voyageai_provider.VoyageAIProvider" ) - assert result == mock_instance + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_watson(self, mock_import): + """Test building Watson embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() -def test_get_embedding_function_jina() -> None: - """Test Jina embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_jina = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_jina - mock_providers.__contains__.return_value = True + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function config = { - "provider": "jina", + "provider": "watson", "config": { - "api_key": "jina-key", - "model_name": "jina-embeddings-v2-base-en", + "model_id": "ibm/slate-125m-english-rtrvr", + "api_key": "watson-key", + "url": "https://us-south.ml.cloud.ibm.com", + "project_id": "test-project", }, } - result = get_embedding_function(config) + build_embedder(config) - mock_jina.assert_called_once_with( - api_key="jina-key", model_name="jina-embeddings-v2-base-en" - ) - assert result == mock_instance - - -def test_get_embedding_function_unsupported_provider() -> None: - """Test handling of unsupported provider.""" - config = {"provider": "unsupported-provider"} - - with pytest.raises(ValueError, match="Unsupported provider: unsupported-provider"): - get_embedding_function(config) - - -def test_get_embedding_function_config_modification() -> None: - """Test that original config dict is not modified.""" - original_config = { - "provider": "openai", - "config": {"api_key": "test-key", "model": "text-embedding-3-small"}, - } - config_copy = original_config.copy() - - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openai = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openai - mock_providers.__contains__.return_value = True - - get_embedding_function(config_copy) - - assert config_copy == original_config - - -def test_get_embedding_function_exclude_none_values() -> None: - """Test that None values are excluded from embedding function calls.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openai = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openai - mock_providers.__contains__.return_value = True - - options = EmbeddingOptions( - provider="openai", api_key=SecretStr("test-key"), model_name=None + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.ibm.watson.WatsonProvider" ) - result = get_embedding_function(options) + def test_build_embedder_unknown_provider(self): + """Test error handling for unknown provider.""" + config = {"provider": "unknown-provider", "config": {}} - call_kwargs = mock_openai.call_args.kwargs - assert "api_key" in call_kwargs - assert call_kwargs["api_key"].get_secret_value() == "test-key" - assert "model_name" not in call_kwargs - assert result == mock_instance + with pytest.raises(ValueError, match="Unknown provider: unknown-provider"): + build_embedder(config) + def test_build_embedder_missing_provider(self): + """Test error handling for missing provider key.""" + config = {"config": {"api_key": "test-key"}} -def test_get_embedding_function_instructor() -> None: - """Test Instructor embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_instructor = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_instructor - mock_providers.__contains__.return_value = True + with pytest.raises(KeyError): + build_embedder(config) + + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_import_error(self, mock_import): + """Test error handling when provider import fails.""" + mock_import.side_effect = ImportError("Module not found") + + config = {"provider": "openai", "config": {"api_key": "test-key"}} + + with pytest.raises(ImportError, match="Failed to import provider openai"): + build_embedder(config) + + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_build_embedder_custom_provider(self, mock_import): + """Test building custom embedder.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_callable = MagicMock() + + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable = mock_embedding_callable config = { - "provider": "instructor", - "config": {"model_name": "hkunlp/instructor-large"}, + "provider": "custom", + "config": {"embedding_callable": mock_embedding_callable}, } - result = get_embedding_function(config) + build_embedder(config) - mock_instructor.assert_called_once_with(model_name="hkunlp/instructor-large") - assert result == mock_instance - - -def test_get_embedding_function_google_generativeai() -> None: - """Test Google Generative AI embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_google = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_google - mock_providers.__contains__.return_value = True - - config = { - "provider": "google-generativeai", - "config": {"api_key": "google-key", "model_name": "models/embedding-001"}, - } - - result = get_embedding_function(config) - - mock_google.assert_called_once_with( - api_key="google-key", model_name="models/embedding-001" + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.custom.custom_provider.CustomProvider" ) - assert result == mock_instance + call_kwargs = mock_provider_class.call_args.kwargs + assert call_kwargs["embedding_callable"] == mock_embedding_callable -def test_get_embedding_function_google_vertex() -> None: - """Test Google Vertex AI embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_vertex = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_vertex - mock_providers.__contains__.return_value = True + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + @patch("crewai.rag.embeddings.factory.build_embedder_from_provider") + def test_build_embedder_with_provider_instance( + self, mock_build_from_provider, mock_import + ): + """Test building embedder from provider instance.""" + from crewai.rag.core.base_embeddings_provider import BaseEmbeddingsProvider - config = { - "provider": "google-vertex", - "config": { - "api_key": "vertex-key", - "project_id": "my-project", - "region": "us-central1", - }, - } + mock_provider = MagicMock(spec=BaseEmbeddingsProvider) + mock_embedding_function = MagicMock() + mock_build_from_provider.return_value = mock_embedding_function - result = get_embedding_function(config) + result = build_embedder(mock_provider) - mock_vertex.assert_called_once_with( - api_key="vertex-key", project_id="my-project", region="us-central1" - ) - assert result == mock_instance - - -def test_get_embedding_function_roboflow() -> None: - """Test Roboflow embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_roboflow = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_roboflow - mock_providers.__contains__.return_value = True - - config = { - "provider": "roboflow", - "config": { - "api_key": "roboflow-key", - "api_url": "https://infer.roboflow.com", - }, - } - - result = get_embedding_function(config) - - mock_roboflow.assert_called_once_with( - api_key="roboflow-key", api_url="https://infer.roboflow.com" - ) - assert result == mock_instance - - -def test_get_embedding_function_openclip() -> None: - """Test OpenCLIP embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openclip = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openclip - mock_providers.__contains__.return_value = True - - config = { - "provider": "openclip", - "config": {"model_name": "ViT-B-32", "checkpoint": "laion2b_s34b_b79k"}, - } - - result = get_embedding_function(config) - - mock_openclip.assert_called_once_with( - model_name="ViT-B-32", checkpoint="laion2b_s34b_b79k" - ) - assert result == mock_instance - - -def test_get_embedding_function_text2vec() -> None: - """Test Text2Vec embedding function.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_text2vec = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_text2vec - mock_providers.__contains__.return_value = True - - config = { - "provider": "text2vec", - "config": {"model_name": "shibing624/text2vec-base-chinese"}, - } - - result = get_embedding_function(config) - - mock_text2vec.assert_called_once_with( - model_name="shibing624/text2vec-base-chinese" - ) - assert result == mock_instance - - -def test_model_to_model_name_conversion() -> None: - """Test that 'model' field is converted to 'model_name' for nested config.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openai = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openai - mock_providers.__contains__.return_value = True - - config = { - "provider": "openai", - "config": {"api_key": "test-key", "model": "text-embedding-3-small"}, - } - - result = get_embedding_function(config) - - mock_openai.assert_called_once_with( - api_key="test-key", model_name="text-embedding-3-small" - ) - assert result == mock_instance - - -def test_api_key_injection_from_env_openai() -> None: - """Test that OpenAI API key is injected from environment when not provided.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openai = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openai - mock_providers.__contains__.return_value = True - - with patch("crewai.rag.embeddings.factory.os.getenv") as mock_getenv: - mock_getenv.return_value = "env-openai-key" - - config = { - "provider": "openai", - "config": {"model": "text-embedding-3-small"}, - } - - result = get_embedding_function(config) - - mock_getenv.assert_called_with("OPENAI_API_KEY") - mock_openai.assert_called_once_with( - api_key="env-openai-key", model_name="text-embedding-3-small" - ) - assert result == mock_instance - - -def test_api_key_injection_from_env_cohere() -> None: - """Test that Cohere API key is injected from environment when not provided.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_cohere = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_cohere - mock_providers.__contains__.return_value = True - - with patch("crewai.rag.embeddings.factory.os.getenv") as mock_getenv: - mock_getenv.return_value = "env-cohere-key" - - config = { - "provider": "cohere", - "config": {"model_name": "embed-english-v3.0"}, - } - - result = get_embedding_function(config) - - mock_getenv.assert_called_with("COHERE_API_KEY") - mock_cohere.assert_called_once_with( - api_key="env-cohere-key", model_name="embed-english-v3.0" - ) - assert result == mock_instance - - -def test_api_key_not_injected_when_provided() -> None: - """Test that API key from config takes precedence over environment.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_openai = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_openai - mock_providers.__contains__.return_value = True - - with patch("crewai.rag.embeddings.factory.os.getenv") as mock_getenv: - mock_getenv.return_value = "env-key" - - config = { - "provider": "openai", - "config": {"api_key": "config-key", "model": "text-embedding-3-small"}, - } - - result = get_embedding_function(config) - - mock_openai.assert_called_once_with( - api_key="config-key", model_name="text-embedding-3-small" - ) - assert result == mock_instance - - -def test_amazon_bedrock_session_injection() -> None: - """Test that boto3 session is automatically created for amazon-bedrock.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_bedrock = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_bedrock - mock_providers.__contains__.return_value = True - - mock_boto3 = MagicMock() - with patch.dict("sys.modules", {"boto3": mock_boto3}): - mock_session = MagicMock() - mock_boto3.Session.return_value = mock_session - - config = { - "provider": "amazon-bedrock", - "config": {"model_name": "amazon.titan-embed-text-v1"}, - } - - result = get_embedding_function(config) - - mock_boto3.Session.assert_called_once() - mock_bedrock.assert_called_once_with( - session=mock_session, model_name="amazon.titan-embed-text-v1" - ) - assert result == mock_instance - - -def test_amazon_bedrock_session_not_injected_when_provided() -> None: - """Test that provided session is used for amazon-bedrock.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_instance = MagicMock() - mock_bedrock = MagicMock(return_value=mock_instance) - mock_providers.__getitem__.return_value = mock_bedrock - mock_providers.__contains__.return_value = True - - existing_session = MagicMock() - config = { - "provider": "amazon-bedrock", - "config": { - "session": existing_session, - "model_name": "amazon.titan-embed-text-v1", - }, - } - - result = get_embedding_function(config) - - mock_bedrock.assert_called_once_with( - session=existing_session, model_name="amazon.titan-embed-text-v1" - ) - assert result == mock_instance - - -def test_amazon_bedrock_boto3_import_error() -> None: - """Test error handling when boto3 is not installed.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_providers.__contains__.return_value = True - - with patch.dict("sys.modules", {"boto3": None}): - config = { - "provider": "amazon-bedrock", - "config": {"model_name": "amazon.titan-embed-text-v1"}, - } - - with pytest.raises( - ImportError, match="boto3 is required for amazon-bedrock" - ): - get_embedding_function(config) - - -def test_amazon_bedrock_session_creation_error() -> None: - """Test error handling when AWS session creation fails.""" - with patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") as mock_providers: - mock_providers.__contains__.return_value = True - - mock_boto3 = MagicMock() - with patch.dict("sys.modules", {"boto3": mock_boto3}): - mock_boto3.Session.side_effect = Exception("AWS credentials not configured") - - config = { - "provider": "amazon-bedrock", - "config": {"model_name": "amazon.titan-embed-text-v1"}, - } - - with pytest.raises(ValueError, match="Failed to create AWS session"): - get_embedding_function(config) - - -def test_invalid_config_format() -> None: - """Test error handling for invalid config format.""" - config = { - "provider": "openai", - "api_key": "test-key", - "model": "text-embedding-3-small", - } - - with pytest.raises(ValueError, match="Invalid embedder configuration format"): - get_embedding_function(config) + mock_build_from_provider.assert_called_once_with(mock_provider) + assert result == mock_embedding_function + mock_import.assert_not_called() diff --git a/tests/rag/embeddings/test_factory_azure.py b/tests/rag/embeddings/test_factory_azure.py index e17d2bbef..97075275a 100644 --- a/tests/rag/embeddings/test_factory_azure.py +++ b/tests/rag/embeddings/test_factory_azure.py @@ -4,76 +4,119 @@ from unittest.mock import MagicMock, patch import pytest -from crewai.rag.embeddings.factory import EmbedderConfig, get_embedding_function +from crewai.rag.embeddings.factory import build_embedder class TestAzureEmbedderFactory: """Test Azure embedder configuration with factory function.""" - @patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") - def test_azure_with_nested_config(self, mock_providers): + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_azure_with_nested_config(self, mock_import): """Test Azure configuration with nested config key.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() - mock_embedding = MagicMock() - mock_openai_func = MagicMock(return_value=mock_embedding) - mock_providers.__getitem__.return_value = mock_openai_func - mock_providers.__contains__.return_value = True + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function - embedder_config = EmbedderConfig( - provider="openai", - config={ + embedder_config = { + "provider": "azure", + "config": { "api_key": "test-azure-key", "api_base": "https://test.openai.azure.com/", "api_type": "azure", "api_version": "2023-05-15", - "model": "text-embedding-3-small", + "model_name": "text-embedding-3-small", "deployment_id": "test-deployment", }, - ) - - result = get_embedding_function(embedder_config) - - mock_openai_func.assert_called_once_with( - api_key="test-azure-key", - api_base="https://test.openai.azure.com/", - api_type="azure", - api_version="2023-05-15", - model_name="text-embedding-3-small", - deployment_id="test-deployment", - ) - assert result == mock_embedding - - @patch("crewai.rag.embeddings.factory.EMBEDDING_PROVIDERS") - def test_regular_openai_with_nested_config(self, mock_providers): - """Test regular OpenAI configuration with nested config.""" - - mock_embedding = MagicMock() - mock_openai_func = MagicMock(return_value=mock_embedding) - mock_providers.__getitem__.return_value = mock_openai_func - mock_providers.__contains__.return_value = True - - embedder_config = EmbedderConfig( - provider="openai", - config={"api_key": "test-openai-key", "model": "text-embedding-3-large"}, - ) - - result = get_embedding_function(embedder_config) - - mock_openai_func.assert_called_once_with( - api_key="test-openai-key", model_name="text-embedding-3-large" - ) - assert result == mock_embedding - - def test_flat_format_raises_error(self): - """Test that flat format raises an error.""" - embedder_config = { - "provider": "openai", - "api_key": "test-key", - "model_name": "text-embedding-3-small", } - with pytest.raises(ValueError) as exc_info: - get_embedding_function(embedder_config) + result = build_embedder(embedder_config) - assert "Invalid embedder configuration format" in str(exc_info.value) - assert "nested under a 'config' key" in str(exc_info.value) + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.microsoft.azure.AzureProvider" + ) + + call_kwargs = mock_provider_class.call_args.kwargs + assert call_kwargs["api_key"] == "test-azure-key" + assert call_kwargs["api_base"] == "https://test.openai.azure.com/" + assert call_kwargs["api_type"] == "azure" + assert call_kwargs["api_version"] == "2023-05-15" + assert call_kwargs["model_name"] == "text-embedding-3-small" + assert call_kwargs["deployment_id"] == "test-deployment" + + assert result == mock_embedding_function + + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_regular_openai_with_nested_config(self, mock_import): + """Test regular OpenAI configuration with nested config.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() + + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function + + embedder_config = { + "provider": "openai", + "config": {"api_key": "test-openai-key", "model": "text-embedding-3-large"}, + } + + result = build_embedder(embedder_config) + + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.openai.openai_provider.OpenAIProvider" + ) + + call_kwargs = mock_provider_class.call_args.kwargs + assert call_kwargs["api_key"] == "test-openai-key" + assert call_kwargs["model"] == "text-embedding-3-large" + + assert result == mock_embedding_function + + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_azure_provider_with_minimal_config(self, mock_import): + """Test Azure provider with minimal required configuration.""" + mock_provider_class = MagicMock() + mock_provider_instance = MagicMock() + mock_embedding_function = MagicMock() + + mock_import.return_value = mock_provider_class + mock_provider_class.return_value = mock_provider_instance + mock_provider_instance.embedding_callable.return_value = mock_embedding_function + + embedder_config = { + "provider": "azure", + "config": { + "api_key": "test-key", + "api_base": "https://test.openai.azure.com/", + }, + } + + build_embedder(embedder_config) + + mock_import.assert_called_once_with( + "crewai.rag.embeddings.providers.microsoft.azure.AzureProvider" + ) + + call_kwargs = mock_provider_class.call_args.kwargs + assert call_kwargs["api_key"] == "test-key" + assert call_kwargs["api_base"] == "https://test.openai.azure.com/" + + @patch("crewai.rag.embeddings.factory.import_and_validate_definition") + def test_azure_import_error(self, mock_import): + """Test handling of import errors for Azure provider.""" + mock_import.side_effect = ImportError("Failed to import Azure provider") + + embedder_config = { + "provider": "azure", + "config": {"api_key": "test-key"}, + } + + with pytest.raises(ImportError) as exc_info: + build_embedder(embedder_config) + + assert "Failed to import provider azure" in str(exc_info.value) diff --git a/tests/rag/test_error_handling.py b/tests/rag/test_error_handling.py index ef2c8f7d5..0cf033c52 100644 --- a/tests/rag/test_error_handling.py +++ b/tests/rag/test_error_handling.py @@ -55,7 +55,7 @@ def test_knowledge_storage_invalid_embedding_config(mock_get_client: MagicMock) mock_get_client.return_value = MagicMock() with patch( - "crewai.knowledge.storage.knowledge_storage.get_embedding_function" + "crewai.knowledge.storage.knowledge_storage.build_embedder" ) as mock_get_embedding: mock_get_embedding.side_effect = ValueError( "Unsupported provider: invalid_provider" diff --git a/tests/utilities/test_azure_embedder_config.py b/tests/utilities/test_azure_embedder_config.py deleted file mode 100644 index 873c68958..000000000 --- a/tests/utilities/test_azure_embedder_config.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Test Azure embedder configuration with nested format only.""" - -from unittest.mock import MagicMock, patch - -from crewai.rag.embeddings.configurator import EmbeddingConfigurator - - -class TestAzureEmbedderConfiguration: - """Test Azure embedder configuration with nested format.""" - - @patch( - "chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction" - ) - def test_azure_openai_with_nested_config(self, mock_openai_func): - """Test Azure configuration using OpenAI provider with nested config key.""" - mock_embedding = MagicMock() - mock_openai_func.return_value = mock_embedding - - configurator = EmbeddingConfigurator() - - embedder_config = { - "provider": "openai", - "config": { - "api_key": "test-azure-key", - "api_base": "https://test.openai.azure.com/", - "api_type": "azure", - "api_version": "2023-05-15", - "model": "text-embedding-3-small", - "deployment_id": "test-deployment", - }, - } - - result = configurator.configure_embedder(embedder_config) - - mock_openai_func.assert_called_once_with( - api_key="test-azure-key", - model_name="text-embedding-3-small", - api_base="https://test.openai.azure.com/", - api_type="azure", - api_version="2023-05-15", - default_headers=None, - dimensions=None, - deployment_id="test-deployment", - organization_id=None, - ) - assert result == mock_embedding - - @patch( - "chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction" - ) - def test_azure_provider_with_nested_config(self, mock_openai_func): - """Test using 'azure' as provider with nested config.""" - mock_embedding = MagicMock() - mock_openai_func.return_value = mock_embedding - - configurator = EmbeddingConfigurator() - - embedder_config = { - "provider": "azure", - "config": { - "api_key": "test-azure-key", - "api_base": "https://test.openai.azure.com/", - "api_version": "2023-05-15", - "model": "text-embedding-3-small", - "deployment_id": "test-deployment", - }, - } - - result = configurator.configure_embedder(embedder_config) - - mock_openai_func.assert_called_once_with( - api_key="test-azure-key", - api_base="https://test.openai.azure.com/", - api_type="azure", - api_version="2023-05-15", - model_name="text-embedding-3-small", - default_headers=None, - dimensions=None, - deployment_id="test-deployment", - organization_id=None, - ) - assert result == mock_embedding diff --git a/tests/utilities/test_embedding_configuration.py b/tests/utilities/test_embedding_configuration.py deleted file mode 100644 index 2de8cd301..000000000 --- a/tests/utilities/test_embedding_configuration.py +++ /dev/null @@ -1,25 +0,0 @@ -from unittest.mock import patch - -import pytest - -from crewai.rag.embeddings.configurator import EmbeddingConfigurator - - -def test_configure_embedder_importerror(): - configurator = EmbeddingConfigurator() - - embedder_config = { - 'provider': 'openai', - 'config': { - 'model': 'text-embedding-ada-002', - } - } - - with patch('chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction') as mock_openai: - mock_openai.side_effect = ImportError("Module not found.") - - with pytest.raises(ImportError) as exc_info: - configurator.configure_embedder(embedder_config) - - assert str(exc_info.value) == "Module not found." - mock_openai.assert_called_once() diff --git a/uv.lock b/uv.lock index 325bb07cd..31a9cbda6 100644 --- a/uv.lock +++ b/uv.lock @@ -142,6 +142,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1b/8e/78ee35774201f38d5e1ba079c9958f7629b1fd079459aea9467441dbfbf5/aiohttp-3.12.15-cp313-cp313-win_amd64.whl", hash = "sha256:1a649001580bdb37c6fdb1bebbd7e3bc688e8ec2b5c6f52edbb664662b17dc84", size = 449067, upload-time = "2025-07-29T05:51:52.549Z" }, ] +[[package]] +name = "aiolimiter" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/23/b52debf471f7a1e42e362d959a3982bdcb4fe13a5d46e63d28868807a79c/aiolimiter-1.2.1.tar.gz", hash = "sha256:e02a37ea1a855d9e832252a105420ad4d15011505512a1a1d814647451b5cca9", size = 7185, upload-time = "2024-12-08T15:31:51.496Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/ba/df6e8e1045aebc4778d19b8a3a9bc1808adb1619ba94ca354d9ba17d86c3/aiolimiter-1.2.1-py3-none-any.whl", hash = "sha256:d3f249e9059a20badcb56b61601a83556133655c11d1eb3dd3e04ff069e5f3c7", size = 6711, upload-time = "2024-12-08T15:31:49.874Z" }, +] + [[package]] name = "aiosignal" version = "1.4.0" @@ -371,6 +380,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, ] +[[package]] +name = "boto3" +version = "1.40.39" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/5b/2b79e27e19b5dc0360e07cb40c6364dd8f7104fe7b4016ae65a527a2535d/boto3-1.40.39.tar.gz", hash = "sha256:27ca06d4d6f838b056b4935c9eceb92c8d125dbe0e895c5583bcf7130627dcd2", size = 111587, upload-time = "2025-09-25T19:20:02.534Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7e/72b4f38c85ea879b27f90ad0d51f26b26e320bbc86b75664c0cf409d3d84/boto3-1.40.39-py3-none-any.whl", hash = "sha256:e2cab5606269fe9f428981892aa592b7e0c087a038774475fa4cd6c8b5fe0a99", size = 139345, upload-time = "2025-09-25T19:20:00.381Z" }, +] + +[[package]] +name = "botocore" +version = "1.40.39" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/30/44883126961d895ff8b69b8f7d1b2c60e9a348e38d4354ee597b69b8b5f8/botocore-1.40.39.tar.gz", hash = "sha256:c6efc55cac341811ba90c693d20097db6e2ce903451d94496bccd3f672b1709d", size = 14356776, upload-time = "2025-09-25T19:19:49.842Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/57/2400d0cf030650b02a25a2aeb87729e51cb2aa8d97a2b4d9fec05c671f0b/botocore-1.40.39-py3-none-any.whl", hash = "sha256:144e0e887a9fc198c6772f660fc006028bd1a9ce5eea3caddd848db3e421bc79", size = 14025786, upload-time = "2025-09-25T19:19:46.177Z" }, +] + [[package]] name = "browserbase" version = "1.4.0" @@ -680,6 +717,9 @@ dependencies = [ aisuite = [ { name = "aisuite" }, ] +aws = [ + { name = "boto3" }, +] docling = [ { name = "docling" }, ] @@ -704,6 +744,12 @@ qdrant = [ tools = [ { name = "crewai-tools" }, ] +voyageai = [ + { name = "voyageai" }, +] +watson = [ + { name = "ibm-watsonx-ai" }, +] [package.dev-dependencies] dev = [ @@ -730,10 +776,12 @@ requires-dist = [ { name = "aisuite", marker = "extra == 'aisuite'", specifier = ">=0.1.10" }, { name = "appdirs", specifier = ">=1.4.4" }, { name = "blinker", specifier = ">=1.9.0" }, + { name = "boto3", marker = "extra == 'aws'", specifier = ">=1.40.38" }, { name = "chromadb", specifier = ">=0.5.23" }, { name = "click", specifier = ">=8.1.7" }, { name = "crewai-tools", marker = "extra == 'tools'", specifier = "~=0.73.0" }, { name = "docling", marker = "extra == 'docling'", specifier = ">=2.12.0" }, + { name = "ibm-watsonx-ai", marker = "extra == 'watson'", specifier = ">=1.3.39" }, { name = "instructor", specifier = ">=1.3.3" }, { name = "json-repair", specifier = "==0.25.2" }, { name = "json5", specifier = ">=0.10.0" }, @@ -763,8 +811,9 @@ requires-dist = [ { name = "tomli", specifier = ">=2.0.2" }, { name = "tomli-w", specifier = ">=1.1.0" }, { name = "uv", specifier = ">=0.4.25" }, + { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.5" }, ] -provides-extras = ["aisuite", "docling", "embeddings", "mem0", "openpyxl", "pandas", "pdfplumber", "qdrant", "tools"] +provides-extras = ["aisuite", "aws", "docling", "embeddings", "mem0", "openpyxl", "pandas", "pdfplumber", "qdrant", "tools", "voyageai", "watson"] [package.metadata.requires-dev] dev = [ @@ -1608,6 +1657,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, ] +[[package]] +name = "ibm-cos-sdk" +version = "2.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ibm-cos-sdk-core" }, + { name = "ibm-cos-sdk-s3transfer" }, + { name = "jmespath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/b8/b99f17ece72d4bccd7e75539b9a294d0f73ace5c6c475d8f2631afd6f65b/ibm_cos_sdk-2.14.3.tar.gz", hash = "sha256:643b6f2aa1683adad7f432df23407d11ae5adb9d9ad01214115bee77dc64364a", size = 58831, upload-time = "2025-08-01T06:35:51.722Z" } + +[[package]] +name = "ibm-cos-sdk-core" +version = "2.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7e/45/80c23aa1e13175a9deefe43cbf8e853a3d3bfc8dfa8b6d6fe83e5785fe21/ibm_cos_sdk_core-2.14.3.tar.gz", hash = "sha256:85dee7790c92e8db69bf39dae4c02cac211e3c1d81bb86e64fa2d1e929674623", size = 1103637, upload-time = "2025-08-01T06:35:41.645Z" } + +[[package]] +name = "ibm-cos-sdk-s3transfer" +version = "2.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ibm-cos-sdk-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/ff/c9baf0997266d398ae08347951a2970e5e96ed6232ed0252f649f2b9a7eb/ibm_cos_sdk_s3transfer-2.14.3.tar.gz", hash = "sha256:2251ebfc4a46144401e431f4a5d9f04c262a0d6f95c88a8e71071da056e55f72", size = 139594, upload-time = "2025-08-01T06:35:46.403Z" } + +[[package]] +name = "ibm-watsonx-ai" +version = "1.3.39" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "certifi" }, + { name = "httpx" }, + { name = "ibm-cos-sdk" }, + { name = "lomond" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "requests" }, + { name = "tabulate" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/a1/ce3aee11d3fabee21960cf2ee0b67698079ce12970f02f90fffbe6e3796c/ibm_watsonx_ai-1.3.39.tar.gz", hash = "sha256:357a7d823948655035e4de6265519bf6e377a497f22ec2d26270a9327b71eb5a", size = 788146, upload-time = "2025-09-24T11:59:48.606Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/fd/dd70433f5487d75de82a3658768f7fe31323779217dba05e9278f12b85cd/ibm_watsonx_ai-1.3.39-py3-none-any.whl", hash = "sha256:4f6b08efdd1c40f554a3d9e96cb798e8f86e8e03897765672d3b1850bfa20e00", size = 1203329, upload-time = "2025-09-24T11:59:46.956Z" }, +] + [[package]] name = "identify" version = "2.6.14" @@ -1860,6 +1962,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/16/f5a0135ccd968b480daad0e6ab34b0c7c5ba3bc447e5088152696140dcb3/jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca", size = 207278, upload-time = "2025-05-18T19:04:23.627Z" }, ] +[[package]] +name = "jmespath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, +] + [[package]] name = "json-repair" version = "0.25.2" @@ -1890,6 +2001,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/32/290ca20eb3a2b97ffa6ba1791fcafacb3cd2f41f539c96eb54cfc3cfcf47/jsonlines-3.1.0-py3-none-any.whl", hash = "sha256:632f5e38f93dfcb1ac8c4e09780b92af3a55f38f26e7c47ae85109d420b6ad39", size = 8592, upload-time = "2022-07-01T16:38:02.082Z" }, ] +[[package]] +name = "jsonpatch" +version = "1.33" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonpointer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/78/18813351fe5d63acad16aec57f94ec2b70a09e53ca98145589e185423873/jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c", size = 21699, upload-time = "2023-06-26T12:07:29.144Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade", size = 12898, upload-time = "2023-06-16T21:01:28.466Z" }, +] + [[package]] name = "jsonpickle" version = "4.1.1" @@ -1899,6 +2022,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/73/04df8a6fa66d43a9fd45c30f283cc4afff17da671886e451d52af60bdc7e/jsonpickle-4.1.1-py3-none-any.whl", hash = "sha256:bb141da6057898aa2438ff268362b126826c812a1721e31cf08a6e142910dc91", size = 47125, upload-time = "2025-06-02T20:36:08.647Z" }, ] +[[package]] +name = "jsonpointer" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/0a/eebeb1fa92507ea94016a2a790b93c2ae41a7e18778f85471dc54475ed25/jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef", size = 9114, upload-time = "2024-06-10T19:24:42.462Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" }, +] + [[package]] name = "jsonref" version = "1.1.0" @@ -2012,6 +2144,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/fb/dce4757f257cb4e11e13b71ce502dc5d1caf51f1e5cccfdae85bf23960a0/lancedb-0.25.1-cp39-abi3-win_amd64.whl", hash = "sha256:2c6effc10c8263ea84261f49d5ff1957c18814ed7e3eaa5094d71b1aa0573871", size = 38390878, upload-time = "2025-09-23T22:55:24.687Z" }, ] +[[package]] +name = "langchain-core" +version = "0.3.76" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonpatch" }, + { name = "langsmith" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "tenacity" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/4d/5e2ea7754ee0a1f524c412801c6ba9ad49318ecb58b0d524903c3d9efe0a/langchain_core-0.3.76.tar.gz", hash = "sha256:71136a122dd1abae2c289c5809d035cf12b5f2bb682d8a4c1078cd94feae7419", size = 573568, upload-time = "2025-09-10T14:49:39.863Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/b5/501c0ffcb09c734457ceaa86bc7b1dd37b6a261147bd653add03b838aacb/langchain_core-0.3.76-py3-none-any.whl", hash = "sha256:46e0eb48c7ac532432d51f8ca1ece1804c82afe9ae3dcf027b867edadf82b3ec", size = 447508, upload-time = "2025-09-10T14:49:38.179Z" }, +] + +[[package]] +name = "langchain-text-splitters" +version = "0.3.11" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/11/43/dcda8fd25f0b19cb2835f2f6bb67f26ad58634f04ac2d8eae00526b0fa55/langchain_text_splitters-0.3.11.tar.gz", hash = "sha256:7a50a04ada9a133bbabb80731df7f6ddac51bc9f1b9cab7fa09304d71d38a6cc", size = 46458, upload-time = "2025-08-31T23:02:58.316Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/0d/41a51b40d24ff0384ec4f7ab8dd3dcea8353c05c973836b5e289f1465d4f/langchain_text_splitters-0.3.11-py3-none-any.whl", hash = "sha256:cf079131166a487f1372c8ab5d0bfaa6c0a4291733d9c43a34a16ac9bcd6a393", size = 33845, upload-time = "2025-08-31T23:02:57.195Z" }, +] + +[[package]] +name = "langsmith" +version = "0.4.31" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson", marker = "platform_python_implementation != 'PyPy'" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "requests-toolbelt" }, + { name = "zstandard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/f5/edbdf89a162ee025348b3b2080fb3b88f4a1040a5a186f32d34aca913994/langsmith-0.4.31.tar.gz", hash = "sha256:5fb3729e22bd9a225391936cb9d1080322e6c375bb776514af06b56d6c46ed3e", size = 959698, upload-time = "2025-09-25T04:18:19.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/8e/e7a43d907a147e1f87eebdd6737483f9feba52a5d4b20f69d0bd6f2fa22f/langsmith-0.4.31-py3-none-any.whl", hash = "sha256:64f340bdead21defe5f4a6ca330c11073e35444989169f669508edf45a19025f", size = 386347, upload-time = "2025-09-25T04:18:16.69Z" }, +] + [[package]] name = "latex2mathml" version = "3.78.1" @@ -2068,6 +2248,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] +[[package]] +name = "lomond" +version = "0.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/9e/ef7813c910d4a893f2bc763ce9246269f55cc68db21dc1327e376d6a2d02/lomond-0.3.3.tar.gz", hash = "sha256:427936596b144b4ec387ead99aac1560b77c8a78107d3d49415d3abbe79acbd3", size = 28789, upload-time = "2018-09-21T15:17:43.297Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/b1/02eebed49c754b01b17de7705caa8c4ceecfb4f926cdafc220c863584360/lomond-0.3.3-py2.py3-none-any.whl", hash = "sha256:df1dd4dd7b802a12b71907ab1abb08b8ce9950195311207579379eb3b1553de7", size = 35512, upload-time = "2018-09-21T15:17:38.686Z" }, +] + [[package]] name = "lxml" version = "5.4.0" @@ -3245,7 +3437,7 @@ wheels = [ [[package]] name = "pandas" -version = "2.3.2" +version = "2.2.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -3254,42 +3446,42 @@ dependencies = [ { name = "pytz" }, { name = "tzdata" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/8e/0e90233ac205ad182bd6b422532695d2b9414944a280488105d598c70023/pandas-2.3.2.tar.gz", hash = "sha256:ab7b58f8f82706890924ccdfb5f48002b83d2b5a3845976a9fb705d36c34dcdb", size = 4488684, upload-time = "2025-08-21T10:28:29.257Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/16/a8eeb70aad84ccbf14076793f90e0031eded63c1899aeae9fdfbf37881f4/pandas-2.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:52bc29a946304c360561974c6542d1dd628ddafa69134a7131fdfd6a5d7a1a35", size = 11539648, upload-time = "2025-08-21T10:26:36.236Z" }, - { url = "https://files.pythonhosted.org/packages/47/f1/c5bdaea13bf3708554d93e948b7ea74121ce6e0d59537ca4c4f77731072b/pandas-2.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:220cc5c35ffaa764dd5bb17cf42df283b5cb7fdf49e10a7b053a06c9cb48ee2b", size = 10786923, upload-time = "2025-08-21T10:26:40.518Z" }, - { url = "https://files.pythonhosted.org/packages/bb/10/811fa01476d29ffed692e735825516ad0e56d925961819e6126b4ba32147/pandas-2.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c05e15111221384019897df20c6fe893b2f697d03c811ee67ec9e0bb5a3424", size = 11726241, upload-time = "2025-08-21T10:26:43.175Z" }, - { url = "https://files.pythonhosted.org/packages/c4/6a/40b043b06e08df1ea1b6d20f0e0c2f2c4ec8c4f07d1c92948273d943a50b/pandas-2.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc03acc273c5515ab69f898df99d9d4f12c4d70dbfc24c3acc6203751d0804cf", size = 12349533, upload-time = "2025-08-21T10:26:46.611Z" }, - { url = "https://files.pythonhosted.org/packages/e2/ea/2e081a2302e41a9bca7056659fdd2b85ef94923723e41665b42d65afd347/pandas-2.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d25c20a03e8870f6339bcf67281b946bd20b86f1a544ebbebb87e66a8d642cba", size = 13202407, upload-time = "2025-08-21T10:26:49.068Z" }, - { url = "https://files.pythonhosted.org/packages/f4/12/7ff9f6a79e2ee8869dcf70741ef998b97ea20050fe25f83dc759764c1e32/pandas-2.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21bb612d148bb5860b7eb2c10faacf1a810799245afd342cf297d7551513fbb6", size = 13837212, upload-time = "2025-08-21T10:26:51.832Z" }, - { url = "https://files.pythonhosted.org/packages/d8/df/5ab92fcd76455a632b3db34a746e1074d432c0cdbbd28d7cd1daba46a75d/pandas-2.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:b62d586eb25cb8cb70a5746a378fc3194cb7f11ea77170d59f889f5dfe3cec7a", size = 11338099, upload-time = "2025-08-21T10:26:54.382Z" }, - { url = "https://files.pythonhosted.org/packages/7a/59/f3e010879f118c2d400902d2d871c2226cef29b08c09fb8dc41111730400/pandas-2.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1333e9c299adcbb68ee89a9bb568fc3f20f9cbb419f1dd5225071e6cddb2a743", size = 11563308, upload-time = "2025-08-21T10:26:56.656Z" }, - { url = "https://files.pythonhosted.org/packages/38/18/48f10f1cc5c397af59571d638d211f494dba481f449c19adbd282aa8f4ca/pandas-2.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:76972bcbd7de8e91ad5f0ca884a9f2c477a2125354af624e022c49e5bd0dfff4", size = 10820319, upload-time = "2025-08-21T10:26:59.162Z" }, - { url = "https://files.pythonhosted.org/packages/95/3b/1e9b69632898b048e223834cd9702052bcf06b15e1ae716eda3196fb972e/pandas-2.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b98bdd7c456a05eef7cd21fd6b29e3ca243591fe531c62be94a2cc987efb5ac2", size = 11790097, upload-time = "2025-08-21T10:27:02.204Z" }, - { url = "https://files.pythonhosted.org/packages/8b/ef/0e2ffb30b1f7fbc9a588bd01e3c14a0d96854d09a887e15e30cc19961227/pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d81573b3f7db40d020983f78721e9bfc425f411e616ef019a10ebf597aedb2e", size = 12397958, upload-time = "2025-08-21T10:27:05.409Z" }, - { url = "https://files.pythonhosted.org/packages/23/82/e6b85f0d92e9afb0e7f705a51d1399b79c7380c19687bfbf3d2837743249/pandas-2.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e190b738675a73b581736cc8ec71ae113d6c3768d0bd18bffa5b9a0927b0b6ea", size = 13225600, upload-time = "2025-08-21T10:27:07.791Z" }, - { url = "https://files.pythonhosted.org/packages/e8/f1/f682015893d9ed51611948bd83683670842286a8edd4f68c2c1c3b231eef/pandas-2.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c253828cb08f47488d60f43c5fc95114c771bbfff085da54bfc79cb4f9e3a372", size = 13879433, upload-time = "2025-08-21T10:27:10.347Z" }, - { url = "https://files.pythonhosted.org/packages/a7/e7/ae86261695b6c8a36d6a4c8d5f9b9ede8248510d689a2f379a18354b37d7/pandas-2.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:9467697b8083f9667b212633ad6aa4ab32436dcbaf4cd57325debb0ddef2012f", size = 11336557, upload-time = "2025-08-21T10:27:12.983Z" }, - { url = "https://files.pythonhosted.org/packages/ec/db/614c20fb7a85a14828edd23f1c02db58a30abf3ce76f38806155d160313c/pandas-2.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fbb977f802156e7a3f829e9d1d5398f6192375a3e2d1a9ee0803e35fe70a2b9", size = 11587652, upload-time = "2025-08-21T10:27:15.888Z" }, - { url = "https://files.pythonhosted.org/packages/99/b0/756e52f6582cade5e746f19bad0517ff27ba9c73404607c0306585c201b3/pandas-2.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b9b52693123dd234b7c985c68b709b0b009f4521000d0525f2b95c22f15944b", size = 10717686, upload-time = "2025-08-21T10:27:18.486Z" }, - { url = "https://files.pythonhosted.org/packages/37/4c/dd5ccc1e357abfeee8353123282de17997f90ff67855f86154e5a13b81e5/pandas-2.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bd281310d4f412733f319a5bc552f86d62cddc5f51d2e392c8787335c994175", size = 11278722, upload-time = "2025-08-21T10:27:21.149Z" }, - { url = "https://files.pythonhosted.org/packages/d3/a4/f7edcfa47e0a88cda0be8b068a5bae710bf264f867edfdf7b71584ace362/pandas-2.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96d31a6b4354e3b9b8a2c848af75d31da390657e3ac6f30c05c82068b9ed79b9", size = 11987803, upload-time = "2025-08-21T10:27:23.767Z" }, - { url = "https://files.pythonhosted.org/packages/f6/61/1bce4129f93ab66f1c68b7ed1c12bac6a70b1b56c5dab359c6bbcd480b52/pandas-2.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:df4df0b9d02bb873a106971bb85d448378ef14b86ba96f035f50bbd3688456b4", size = 12766345, upload-time = "2025-08-21T10:27:26.6Z" }, - { url = "https://files.pythonhosted.org/packages/8e/46/80d53de70fee835531da3a1dae827a1e76e77a43ad22a8cd0f8142b61587/pandas-2.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:213a5adf93d020b74327cb2c1b842884dbdd37f895f42dcc2f09d451d949f811", size = 13439314, upload-time = "2025-08-21T10:27:29.213Z" }, - { url = "https://files.pythonhosted.org/packages/28/30/8114832daff7489f179971dbc1d854109b7f4365a546e3ea75b6516cea95/pandas-2.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c13b81a9347eb8c7548f53fd9a4f08d4dfe996836543f805c987bafa03317ae", size = 10983326, upload-time = "2025-08-21T10:27:31.901Z" }, - { url = "https://files.pythonhosted.org/packages/27/64/a2f7bf678af502e16b472527735d168b22b7824e45a4d7e96a4fbb634b59/pandas-2.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0c6ecbac99a354a051ef21c5307601093cb9e0f4b1855984a084bfec9302699e", size = 11531061, upload-time = "2025-08-21T10:27:34.647Z" }, - { url = "https://files.pythonhosted.org/packages/54/4c/c3d21b2b7769ef2f4c2b9299fcadd601efa6729f1357a8dbce8dd949ed70/pandas-2.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6f048aa0fd080d6a06cc7e7537c09b53be6642d330ac6f54a600c3ace857ee9", size = 10668666, upload-time = "2025-08-21T10:27:37.203Z" }, - { url = "https://files.pythonhosted.org/packages/50/e2/f775ba76ecfb3424d7f5862620841cf0edb592e9abd2d2a5387d305fe7a8/pandas-2.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0064187b80a5be6f2f9c9d6bdde29372468751dfa89f4211a3c5871854cfbf7a", size = 11332835, upload-time = "2025-08-21T10:27:40.188Z" }, - { url = "https://files.pythonhosted.org/packages/8f/52/0634adaace9be2d8cac9ef78f05c47f3a675882e068438b9d7ec7ef0c13f/pandas-2.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ac8c320bded4718b298281339c1a50fb00a6ba78cb2a63521c39bec95b0209b", size = 12057211, upload-time = "2025-08-21T10:27:43.117Z" }, - { url = "https://files.pythonhosted.org/packages/0b/9d/2df913f14b2deb9c748975fdb2491da1a78773debb25abbc7cbc67c6b549/pandas-2.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:114c2fe4f4328cf98ce5716d1532f3ab79c5919f95a9cfee81d9140064a2e4d6", size = 12749277, upload-time = "2025-08-21T10:27:45.474Z" }, - { url = "https://files.pythonhosted.org/packages/87/af/da1a2417026bd14d98c236dba88e39837182459d29dcfcea510b2ac9e8a1/pandas-2.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:48fa91c4dfb3b2b9bfdb5c24cd3567575f4e13f9636810462ffed8925352be5a", size = 13415256, upload-time = "2025-08-21T10:27:49.885Z" }, - { url = "https://files.pythonhosted.org/packages/22/3c/f2af1ce8840ef648584a6156489636b5692c162771918aa95707c165ad2b/pandas-2.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:12d039facec710f7ba305786837d0225a3444af7bbd9c15c32ca2d40d157ed8b", size = 10982579, upload-time = "2025-08-21T10:28:08.435Z" }, - { url = "https://files.pythonhosted.org/packages/f3/98/8df69c4097a6719e357dc249bf437b8efbde808038268e584421696cbddf/pandas-2.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c624b615ce97864eb588779ed4046186f967374185c047070545253a52ab2d57", size = 12028163, upload-time = "2025-08-21T10:27:52.232Z" }, - { url = "https://files.pythonhosted.org/packages/0e/23/f95cbcbea319f349e10ff90db488b905c6883f03cbabd34f6b03cbc3c044/pandas-2.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0cee69d583b9b128823d9514171cabb6861e09409af805b54459bd0c821a35c2", size = 11391860, upload-time = "2025-08-21T10:27:54.673Z" }, - { url = "https://files.pythonhosted.org/packages/ad/1b/6a984e98c4abee22058aa75bfb8eb90dce58cf8d7296f8bc56c14bc330b0/pandas-2.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2319656ed81124982900b4c37f0e0c58c015af9a7bbc62342ba5ad07ace82ba9", size = 11309830, upload-time = "2025-08-21T10:27:56.957Z" }, - { url = "https://files.pythonhosted.org/packages/15/d5/f0486090eb18dd8710bf60afeaf638ba6817047c0c8ae5c6a25598665609/pandas-2.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b37205ad6f00d52f16b6d09f406434ba928c1a1966e2771006a9033c736d30d2", size = 11883216, upload-time = "2025-08-21T10:27:59.302Z" }, - { url = "https://files.pythonhosted.org/packages/10/86/692050c119696da19e20245bbd650d8dfca6ceb577da027c3a73c62a047e/pandas-2.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:837248b4fc3a9b83b9c6214699a13f069dc13510a6a6d7f9ba33145d2841a012", size = 12699743, upload-time = "2025-08-21T10:28:02.447Z" }, - { url = "https://files.pythonhosted.org/packages/cd/d7/612123674d7b17cf345aad0a10289b2a384bff404e0463a83c4a3a59d205/pandas-2.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d2c3554bd31b731cd6490d94a28f3abb8dd770634a9e06eb6d2911b9827db370", size = 13186141, upload-time = "2025-08-21T10:28:05.377Z" }, + { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827, upload-time = "2024-09-20T13:08:42.347Z" }, + { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897, upload-time = "2024-09-20T13:08:45.807Z" }, + { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908, upload-time = "2024-09-20T18:37:13.513Z" }, + { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210, upload-time = "2024-09-20T13:08:48.325Z" }, + { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292, upload-time = "2024-09-20T19:01:54.443Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379, upload-time = "2024-09-20T13:08:50.882Z" }, + { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471, upload-time = "2024-09-20T13:08:53.332Z" }, + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222, upload-time = "2024-09-20T13:08:56.254Z" }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274, upload-time = "2024-09-20T13:08:58.645Z" }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836, upload-time = "2024-09-20T19:01:57.571Z" }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505, upload-time = "2024-09-20T13:09:01.501Z" }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420, upload-time = "2024-09-20T19:02:00.678Z" }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457, upload-time = "2024-09-20T13:09:04.105Z" }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166, upload-time = "2024-09-20T13:09:06.917Z" }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload-time = "2024-09-20T13:09:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload-time = "2024-09-20T13:09:28.012Z" }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload-time = "2024-09-20T19:02:10.451Z" }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload-time = "2024-09-20T13:09:30.814Z" }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload-time = "2024-09-20T19:02:13.825Z" }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload-time = "2024-09-20T13:09:33.462Z" }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload-time = "2024-09-20T13:09:35.871Z" }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload-time = "2024-09-20T13:09:38.685Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload-time = "2024-09-20T13:09:41.141Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload-time = "2024-09-20T19:02:16.905Z" }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload-time = "2024-09-20T13:09:44.39Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload-time = "2024-09-20T19:02:20.639Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" }, ] [[package]] @@ -4530,6 +4722,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, ] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + [[package]] name = "rich" version = "14.1.0" @@ -4703,6 +4907,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/04/afc078a12cf68592345b1e2d6ecdff837d286bac023d7a22c54c7a698c5b/ruff-0.13.1-py3-none-win_arm64.whl", hash = "sha256:c0bae9ffd92d54e03c2bf266f466da0a65e145f298ee5b5846ed435f6a00518a", size = 12437893, upload-time = "2025-09-18T19:52:41.283Z" }, ] +[[package]] +name = "s3transfer" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/74/8d69dcb7a9efe8baa2046891735e5dfe433ad558ae23d9e3c14c633d1d58/s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125", size = 151547, upload-time = "2025-09-09T19:23:31.089Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" }, +] + [[package]] name = "safetensors" version = "0.6.2" @@ -5746,6 +5962,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" }, ] +[[package]] +name = "voyageai" +version = "0.3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "aiolimiter" }, + { name = "langchain-text-splitters" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "tenacity" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/51/9b/e40f90793c1d03610b6109852791f752fcb257989a96701258278f874e00/voyageai-0.3.5.tar.gz", hash = "sha256:963e0d71611af529fa0e496db232a4f660b5f73bce7af1ab288a7f59df7512da", size = 20414, upload-time = "2025-09-11T00:28:26.29Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/9d/709f5c7fc80a7bf11952fbccfca2bc5525bd5d345521795358819bd01d02/voyageai-0.3.5-py3-none-any.whl", hash = "sha256:1f70fcf3532d7e0bbc4332b1831a6fc1f714f268eeddc8b2859b81bf06a82411", size = 28257, upload-time = "2025-09-11T00:28:24.62Z" }, +] + [[package]] name = "watchfiles" version = "1.1.0" @@ -6090,3 +6327,78 @@ sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50e wheels = [ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, ] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/7a/28efd1d371f1acd037ac64ed1c5e2b41514a6cc937dd6ab6a13ab9f0702f/zstandard-0.25.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e59fdc271772f6686e01e1b3b74537259800f57e24280be3f29c8a0deb1904dd", size = 795256, upload-time = "2025-09-14T22:15:56.415Z" }, + { url = "https://files.pythonhosted.org/packages/96/34/ef34ef77f1ee38fc8e4f9775217a613b452916e633c4f1d98f31db52c4a5/zstandard-0.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4d441506e9b372386a5271c64125f72d5df6d2a8e8a2a45a0ae09b03cb781ef7", size = 640565, upload-time = "2025-09-14T22:15:58.177Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1b/4fdb2c12eb58f31f28c4d28e8dc36611dd7205df8452e63f52fb6261d13e/zstandard-0.25.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:ab85470ab54c2cb96e176f40342d9ed41e58ca5733be6a893b730e7af9c40550", size = 5345306, upload-time = "2025-09-14T22:16:00.165Z" }, + { url = "https://files.pythonhosted.org/packages/73/28/a44bdece01bca027b079f0e00be3b6bd89a4df180071da59a3dd7381665b/zstandard-0.25.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e05ab82ea7753354bb054b92e2f288afb750e6b439ff6ca78af52939ebbc476d", size = 5055561, upload-time = "2025-09-14T22:16:02.22Z" }, + { url = "https://files.pythonhosted.org/packages/e9/74/68341185a4f32b274e0fc3410d5ad0750497e1acc20bd0f5b5f64ce17785/zstandard-0.25.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:78228d8a6a1c177a96b94f7e2e8d012c55f9c760761980da16ae7546a15a8e9b", size = 5402214, upload-time = "2025-09-14T22:16:04.109Z" }, + { url = "https://files.pythonhosted.org/packages/8b/67/f92e64e748fd6aaffe01e2b75a083c0c4fd27abe1c8747fee4555fcee7dd/zstandard-0.25.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:2b6bd67528ee8b5c5f10255735abc21aa106931f0dbaf297c7be0c886353c3d0", size = 5449703, upload-time = "2025-09-14T22:16:06.312Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e5/6d36f92a197c3c17729a2125e29c169f460538a7d939a27eaaa6dcfcba8e/zstandard-0.25.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4b6d83057e713ff235a12e73916b6d356e3084fd3d14ced499d84240f3eecee0", size = 5556583, upload-time = "2025-09-14T22:16:08.457Z" }, + { url = "https://files.pythonhosted.org/packages/d7/83/41939e60d8d7ebfe2b747be022d0806953799140a702b90ffe214d557638/zstandard-0.25.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9174f4ed06f790a6869b41cba05b43eeb9a35f8993c4422ab853b705e8112bbd", size = 5045332, upload-time = "2025-09-14T22:16:10.444Z" }, + { url = "https://files.pythonhosted.org/packages/b3/87/d3ee185e3d1aa0133399893697ae91f221fda79deb61adbe998a7235c43f/zstandard-0.25.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:25f8f3cd45087d089aef5ba3848cd9efe3ad41163d3400862fb42f81a3a46701", size = 5572283, upload-time = "2025-09-14T22:16:12.128Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1d/58635ae6104df96671076ac7d4ae7816838ce7debd94aecf83e30b7121b0/zstandard-0.25.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3756b3e9da9b83da1796f8809dd57cb024f838b9eeafde28f3cb472012797ac1", size = 4959754, upload-time = "2025-09-14T22:16:14.225Z" }, + { url = "https://files.pythonhosted.org/packages/75/d6/57e9cb0a9983e9a229dd8fd2e6e96593ef2aa82a3907188436f22b111ccd/zstandard-0.25.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:81dad8d145d8fd981b2962b686b2241d3a1ea07733e76a2f15435dfb7fb60150", size = 5266477, upload-time = "2025-09-14T22:16:16.343Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a9/ee891e5edf33a6ebce0a028726f0bbd8567effe20fe3d5808c42323e8542/zstandard-0.25.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a5a419712cf88862a45a23def0ae063686db3d324cec7edbe40509d1a79a0aab", size = 5440914, upload-time = "2025-09-14T22:16:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/58/08/a8522c28c08031a9521f27abc6f78dbdee7312a7463dd2cfc658b813323b/zstandard-0.25.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e7360eae90809efd19b886e59a09dad07da4ca9ba096752e61a2e03c8aca188e", size = 5819847, upload-time = "2025-09-14T22:16:20.559Z" }, + { url = "https://files.pythonhosted.org/packages/6f/11/4c91411805c3f7b6f31c60e78ce347ca48f6f16d552fc659af6ec3b73202/zstandard-0.25.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:75ffc32a569fb049499e63ce68c743155477610532da1eb38e7f24bf7cd29e74", size = 5363131, upload-time = "2025-09-14T22:16:22.206Z" }, + { url = "https://files.pythonhosted.org/packages/ef/d6/8c4bd38a3b24c4c7676a7a3d8de85d6ee7a983602a734b9f9cdefb04a5d6/zstandard-0.25.0-cp310-cp310-win32.whl", hash = "sha256:106281ae350e494f4ac8a80470e66d1fe27e497052c8d9c3b95dc4cf1ade81aa", size = 436469, upload-time = "2025-09-14T22:16:25.002Z" }, + { url = "https://files.pythonhosted.org/packages/93/90/96d50ad417a8ace5f841b3228e93d1bb13e6ad356737f42e2dde30d8bd68/zstandard-0.25.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea9d54cc3d8064260114a0bbf3479fc4a98b21dffc89b3459edd506b69262f6e", size = 506100, upload-time = "2025-09-14T22:16:23.569Z" }, + { url = "https://files.pythonhosted.org/packages/2a/83/c3ca27c363d104980f1c9cee1101cc8ba724ac8c28a033ede6aab89585b1/zstandard-0.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:933b65d7680ea337180733cf9e87293cc5500cc0eb3fc8769f4d3c88d724ec5c", size = 795254, upload-time = "2025-09-14T22:16:26.137Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4d/e66465c5411a7cf4866aeadc7d108081d8ceba9bc7abe6b14aa21c671ec3/zstandard-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3f79487c687b1fc69f19e487cd949bf3aae653d181dfb5fde3bf6d18894706f", size = 640559, upload-time = "2025-09-14T22:16:27.973Z" }, + { url = "https://files.pythonhosted.org/packages/12/56/354fe655905f290d3b147b33fe946b0f27e791e4b50a5f004c802cb3eb7b/zstandard-0.25.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:0bbc9a0c65ce0eea3c34a691e3c4b6889f5f3909ba4822ab385fab9057099431", size = 5348020, upload-time = "2025-09-14T22:16:29.523Z" }, + { url = "https://files.pythonhosted.org/packages/3b/13/2b7ed68bd85e69a2069bcc72141d378f22cae5a0f3b353a2c8f50ef30c1b/zstandard-0.25.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01582723b3ccd6939ab7b3a78622c573799d5d8737b534b86d0e06ac18dbde4a", size = 5058126, upload-time = "2025-09-14T22:16:31.811Z" }, + { url = "https://files.pythonhosted.org/packages/c9/dd/fdaf0674f4b10d92cb120ccff58bbb6626bf8368f00ebfd2a41ba4a0dc99/zstandard-0.25.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5f1ad7bf88535edcf30038f6919abe087f606f62c00a87d7e33e7fc57cb69fcc", size = 5405390, upload-time = "2025-09-14T22:16:33.486Z" }, + { url = "https://files.pythonhosted.org/packages/0f/67/354d1555575bc2490435f90d67ca4dd65238ff2f119f30f72d5cde09c2ad/zstandard-0.25.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:06acb75eebeedb77b69048031282737717a63e71e4ae3f77cc0c3b9508320df6", size = 5452914, upload-time = "2025-09-14T22:16:35.277Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/e9cfd801a3f9190bf3e759c422bbfd2247db9d7f3d54a56ecde70137791a/zstandard-0.25.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9300d02ea7c6506f00e627e287e0492a5eb0371ec1670ae852fefffa6164b072", size = 5559635, upload-time = "2025-09-14T22:16:37.141Z" }, + { url = "https://files.pythonhosted.org/packages/21/88/5ba550f797ca953a52d708c8e4f380959e7e3280af029e38fbf47b55916e/zstandard-0.25.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfd06b1c5584b657a2892a6014c2f4c20e0db0208c159148fa78c65f7e0b0277", size = 5048277, upload-time = "2025-09-14T22:16:38.807Z" }, + { url = "https://files.pythonhosted.org/packages/46/c0/ca3e533b4fa03112facbe7fbe7779cb1ebec215688e5df576fe5429172e0/zstandard-0.25.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f373da2c1757bb7f1acaf09369cdc1d51d84131e50d5fa9863982fd626466313", size = 5574377, upload-time = "2025-09-14T22:16:40.523Z" }, + { url = "https://files.pythonhosted.org/packages/12/9b/3fb626390113f272abd0799fd677ea33d5fc3ec185e62e6be534493c4b60/zstandard-0.25.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6c0e5a65158a7946e7a7affa6418878ef97ab66636f13353b8502d7ea03c8097", size = 4961493, upload-time = "2025-09-14T22:16:43.3Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d3/23094a6b6a4b1343b27ae68249daa17ae0651fcfec9ed4de09d14b940285/zstandard-0.25.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c8e167d5adf59476fa3e37bee730890e389410c354771a62e3c076c86f9f7778", size = 5269018, upload-time = "2025-09-14T22:16:45.292Z" }, + { url = "https://files.pythonhosted.org/packages/8c/a7/bb5a0c1c0f3f4b5e9d5b55198e39de91e04ba7c205cc46fcb0f95f0383c1/zstandard-0.25.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:98750a309eb2f020da61e727de7d7ba3c57c97cf6213f6f6277bb7fb42a8e065", size = 5443672, upload-time = "2025-09-14T22:16:47.076Z" }, + { url = "https://files.pythonhosted.org/packages/27/22/503347aa08d073993f25109c36c8d9f029c7d5949198050962cb568dfa5e/zstandard-0.25.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22a086cff1b6ceca18a8dd6096ec631e430e93a8e70a9ca5efa7561a00f826fa", size = 5822753, upload-time = "2025-09-14T22:16:49.316Z" }, + { url = "https://files.pythonhosted.org/packages/e2/be/94267dc6ee64f0f8ba2b2ae7c7a2df934a816baaa7291db9e1aa77394c3c/zstandard-0.25.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72d35d7aa0bba323965da807a462b0966c91608ef3a48ba761678cb20ce5d8b7", size = 5366047, upload-time = "2025-09-14T22:16:51.328Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a3/732893eab0a3a7aecff8b99052fecf9f605cf0fb5fb6d0290e36beee47a4/zstandard-0.25.0-cp311-cp311-win32.whl", hash = "sha256:f5aeea11ded7320a84dcdd62a3d95b5186834224a9e55b92ccae35d21a8b63d4", size = 436484, upload-time = "2025-09-14T22:16:55.005Z" }, + { url = "https://files.pythonhosted.org/packages/43/a3/c6155f5c1cce691cb80dfd38627046e50af3ee9ddc5d0b45b9b063bfb8c9/zstandard-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:daab68faadb847063d0c56f361a289c4f268706b598afbf9ad113cbe5c38b6b2", size = 506183, upload-time = "2025-09-14T22:16:52.753Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/8945ab86a0820cc0e0cdbf38086a92868a9172020fdab8a03ac19662b0e5/zstandard-0.25.0-cp311-cp311-win_arm64.whl", hash = "sha256:22a06c5df3751bb7dc67406f5374734ccee8ed37fc5981bf1ad7041831fa1137", size = 462533, upload-time = "2025-09-14T22:16:53.878Z" }, + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, +]