mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 16:48:30 +00:00
fix: handle Pydantic V1/V2 compatibility in ChromaDBConfig
Fixes #4095 When users pass a chromadb.config.Settings object (Pydantic V1 model) to ChromaDBConfig (Pydantic V2 dataclass), Pydantic V2 would attempt to validate it and fail with: TypeError: BaseModel.validate() takes 2 positional arguments but 3 were given This fix: - Uses SkipValidation to prevent Pydantic V2 from validating the V1 Settings object - Uses BeforeValidator to handle dict-to-Settings conversion - Adds arbitrary_types_allowed=True to the config for extra safety The settings field now accepts either: - A chromadb.config.Settings instance (passed through unchanged) - A dictionary of settings parameters (converted to Settings internally) Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -2,10 +2,11 @@
|
||||
|
||||
from dataclasses import field
|
||||
import os
|
||||
from typing import Literal, cast
|
||||
from typing import Annotated, Any, Literal, cast
|
||||
import warnings
|
||||
|
||||
from chromadb.config import Settings
|
||||
from pydantic import BeforeValidator, ConfigDict, SkipValidation
|
||||
from pydantic.dataclasses import dataclass as pyd_dataclass
|
||||
|
||||
from crewai.rag.chromadb.constants import (
|
||||
@@ -31,6 +32,40 @@ warnings.filterwarnings(
|
||||
)
|
||||
|
||||
|
||||
def _coerce_settings(value: Any) -> Settings:
|
||||
"""Coerce input value to a chromadb.config.Settings instance.
|
||||
|
||||
This validator handles the Pydantic V1/V2 compatibility issue by:
|
||||
- Passing through existing Settings objects without validation
|
||||
- Converting dict inputs to Settings objects
|
||||
|
||||
Args:
|
||||
value: Either a Settings instance or a dict of settings parameters.
|
||||
|
||||
Returns:
|
||||
A chromadb.config.Settings instance.
|
||||
|
||||
Raises:
|
||||
TypeError: If value is neither a Settings instance nor a dict.
|
||||
"""
|
||||
if isinstance(value, Settings):
|
||||
return value
|
||||
if isinstance(value, dict):
|
||||
return Settings(**value)
|
||||
raise TypeError(
|
||||
f"settings must be a chromadb.config.Settings instance or a dict, "
|
||||
f"got {type(value).__name__}"
|
||||
)
|
||||
|
||||
|
||||
# Type alias that skips Pydantic V2 validation for chromadb Settings (Pydantic V1 model)
|
||||
# and uses a before validator to handle dict-to-Settings conversion
|
||||
ChromaSettings = Annotated[
|
||||
SkipValidation[Settings],
|
||||
BeforeValidator(_coerce_settings),
|
||||
]
|
||||
|
||||
|
||||
def _default_settings() -> Settings:
|
||||
"""Create default ChromaDB settings.
|
||||
|
||||
@@ -64,14 +99,19 @@ def _default_embedding_function() -> ChromaEmbeddingFunctionWrapper:
|
||||
)
|
||||
|
||||
|
||||
@pyd_dataclass(frozen=True)
|
||||
@pyd_dataclass(frozen=True, config=ConfigDict(arbitrary_types_allowed=True))
|
||||
class ChromaDBConfig(BaseRagConfig):
|
||||
"""Configuration for ChromaDB client."""
|
||||
"""Configuration for ChromaDB client.
|
||||
|
||||
The settings field accepts either a chromadb.config.Settings instance
|
||||
or a dictionary of settings parameters. This handles the Pydantic V1/V2
|
||||
compatibility issue where ChromaDB uses Pydantic V1 for its Settings class.
|
||||
"""
|
||||
|
||||
provider: Literal["chromadb"] = field(default="chromadb", init=False)
|
||||
tenant: str = DEFAULT_TENANT
|
||||
database: str = DEFAULT_DATABASE
|
||||
settings: Settings = field(default_factory=_default_settings)
|
||||
settings: ChromaSettings = field(default_factory=_default_settings)
|
||||
embedding_function: ChromaEmbeddingFunctionWrapper = field(
|
||||
default_factory=_default_embedding_function
|
||||
)
|
||||
|
||||
130
lib/crewai/tests/rag/chromadb/test_config.py
Normal file
130
lib/crewai/tests/rag/chromadb/test_config.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Tests for ChromaDBConfig Pydantic V1/V2 compatibility."""
|
||||
|
||||
import pytest
|
||||
from chromadb.config import Settings
|
||||
|
||||
from crewai.rag.chromadb.config import ChromaDBConfig, _coerce_settings
|
||||
|
||||
|
||||
class TestCoerceSettings:
|
||||
"""Test suite for _coerce_settings validator function."""
|
||||
|
||||
def test_coerce_settings_passes_through_settings_instance(self):
|
||||
"""Test that existing Settings instances are passed through unchanged."""
|
||||
settings = Settings(
|
||||
persist_directory="./test_db",
|
||||
allow_reset=True,
|
||||
is_persistent=False,
|
||||
)
|
||||
result = _coerce_settings(settings)
|
||||
assert result is settings
|
||||
assert result.persist_directory == "./test_db"
|
||||
assert result.allow_reset is True
|
||||
assert result.is_persistent is False
|
||||
|
||||
def test_coerce_settings_converts_dict_to_settings(self):
|
||||
"""Test that dict inputs are converted to Settings instances."""
|
||||
settings_dict = {
|
||||
"persist_directory": "./my_custom_db",
|
||||
"allow_reset": True,
|
||||
"is_persistent": True,
|
||||
}
|
||||
result = _coerce_settings(settings_dict)
|
||||
assert isinstance(result, Settings)
|
||||
assert result.persist_directory == "./my_custom_db"
|
||||
assert result.allow_reset is True
|
||||
assert result.is_persistent is True
|
||||
|
||||
def test_coerce_settings_raises_type_error_for_invalid_input(self):
|
||||
"""Test that invalid input types raise TypeError."""
|
||||
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
|
||||
_coerce_settings("invalid_string")
|
||||
|
||||
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
|
||||
_coerce_settings(123)
|
||||
|
||||
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
|
||||
_coerce_settings(["list", "of", "items"])
|
||||
|
||||
|
||||
class TestChromaDBConfigPydanticCompatibility:
|
||||
"""Test suite for ChromaDBConfig Pydantic V1/V2 compatibility.
|
||||
|
||||
These tests verify the fix for GitHub issue #4095:
|
||||
Pydantic V1/V2 Compatibility Crash in RagTool when passing custom ChromaDB Settings.
|
||||
"""
|
||||
|
||||
def test_chromadb_config_accepts_settings_instance(self):
|
||||
"""Test that ChromaDBConfig accepts a chromadb.config.Settings instance.
|
||||
|
||||
This is the main regression test for issue #4095 where passing a Settings
|
||||
instance would cause: TypeError: BaseModel.validate() takes 2 positional
|
||||
arguments but 3 were given
|
||||
"""
|
||||
custom_settings = Settings(
|
||||
persist_directory="./my_db",
|
||||
allow_reset=True,
|
||||
is_persistent=False,
|
||||
)
|
||||
config = ChromaDBConfig(settings=custom_settings)
|
||||
|
||||
assert config.settings is custom_settings
|
||||
assert config.settings.persist_directory == "./my_db"
|
||||
assert config.settings.allow_reset is True
|
||||
assert config.settings.is_persistent is False
|
||||
|
||||
def test_chromadb_config_accepts_settings_dict(self):
|
||||
"""Test that ChromaDBConfig accepts a dict for settings and converts it."""
|
||||
settings_dict = {
|
||||
"persist_directory": "./dict_db",
|
||||
"allow_reset": False,
|
||||
"is_persistent": True,
|
||||
}
|
||||
config = ChromaDBConfig(settings=settings_dict)
|
||||
|
||||
assert isinstance(config.settings, Settings)
|
||||
assert config.settings.persist_directory == "./dict_db"
|
||||
assert config.settings.allow_reset is False
|
||||
assert config.settings.is_persistent is True
|
||||
|
||||
def test_chromadb_config_uses_default_settings_when_not_provided(self):
|
||||
"""Test that ChromaDBConfig uses default settings when none provided."""
|
||||
config = ChromaDBConfig()
|
||||
|
||||
assert isinstance(config.settings, Settings)
|
||||
assert config.settings.allow_reset is True
|
||||
assert config.settings.is_persistent is True
|
||||
|
||||
def test_chromadb_config_with_all_parameters(self):
|
||||
"""Test ChromaDBConfig with all parameters including custom settings."""
|
||||
custom_settings = Settings(
|
||||
persist_directory="./full_test_db",
|
||||
allow_reset=True,
|
||||
is_persistent=True,
|
||||
)
|
||||
config = ChromaDBConfig(
|
||||
tenant="test_tenant",
|
||||
database="test_database",
|
||||
settings=custom_settings,
|
||||
limit=10,
|
||||
score_threshold=0.8,
|
||||
batch_size=50,
|
||||
)
|
||||
|
||||
assert config.tenant == "test_tenant"
|
||||
assert config.database == "test_database"
|
||||
assert config.settings is custom_settings
|
||||
assert config.limit == 10
|
||||
assert config.score_threshold == 0.8
|
||||
assert config.batch_size == 50
|
||||
|
||||
def test_chromadb_config_provider_is_chromadb(self):
|
||||
"""Test that provider field is always 'chromadb'."""
|
||||
config = ChromaDBConfig()
|
||||
assert config.provider == "chromadb"
|
||||
|
||||
def test_chromadb_config_is_frozen(self):
|
||||
"""Test that ChromaDBConfig is immutable (frozen)."""
|
||||
config = ChromaDBConfig()
|
||||
with pytest.raises(AttributeError):
|
||||
config.tenant = "new_tenant"
|
||||
Reference in New Issue
Block a user