feat: rag configuration with optional dependency support (#3394)
Some checks failed
Notify Downstream / notify-downstream (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled

### RAG Config System

* Added ChromaDB client creation via config with sensible defaults
* Introduced optional imports and shared RAG config utilities/schema
* Enabled embedding function support with ChromaDB provider integration
* Refactored configs for immutability and stronger type safety
* Removed unused code and expanded test coverage
This commit is contained in:
Greyson LaLonde
2025-08-26 00:00:22 -04:00
committed by GitHub
parent 2e4bd3f49d
commit 7ac482c7c9
21 changed files with 459 additions and 33 deletions

View File

@@ -3,6 +3,8 @@
from abc import abstractmethod
from typing import Any, Protocol, runtime_checkable, TypedDict, Annotated
from typing_extensions import Unpack, Required
from pydantic import GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema
from crewai.rag.types import (
@@ -96,6 +98,17 @@ class BaseClient(Protocol):
client: Any
embedding_function: EmbeddingFunction
@classmethod
def __get_pydantic_core_schema__(
cls, _source_type: Any, _handler: GetCoreSchemaHandler
) -> CoreSchema:
"""Generate Pydantic core schema for BaseClient Protocol.
This allows the Protocol to be used in Pydantic models without
requiring arbitrary_types_allowed=True.
"""
return core_schema.any_schema()
@abstractmethod
def create_collection(self, **kwargs: Unpack[BaseCollectionParams]) -> None:
"""Create a new collection/index in the vector database.

View File

@@ -1,30 +0,0 @@
"""Base provider protocol for vector database client creation."""
from abc import ABC
from typing import Any, Protocol, runtime_checkable, Union
from pydantic import BaseModel, Field
from crewai.rag.types import EmbeddingFunction
from crewai.rag.embeddings.types import EmbeddingOptions
class BaseProviderOptions(BaseModel, ABC):
"""Base configuration for all provider options."""
client_type: str = Field(..., description="Type of client to create")
embedding_config: Union[EmbeddingOptions, EmbeddingFunction, None] = Field(
default=None,
description="Embedding configuration - either options for built-in providers or a custom function",
)
options: Any = Field(
default=None, description="Additional provider-specific options"
)
@runtime_checkable
class BaseProvider(Protocol):
"""Protocol for vector database client providers."""
def __call__(self, options: BaseProviderOptions) -> Any:
"""Create and return a configured client instance."""
...