mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-27 09:08:14 +00:00
fix: handle Pydantic V1/V2 compatibility in ChromaDBConfig
Fixes #4095 When users pass a chromadb.config.Settings object (Pydantic V1 model) to ChromaDBConfig (Pydantic V2 dataclass), Pydantic V2 would attempt to validate it and fail with: TypeError: BaseModel.validate() takes 2 positional arguments but 3 were given This fix: - Uses SkipValidation to prevent Pydantic V2 from validating the V1 Settings object - Uses BeforeValidator to handle dict-to-Settings conversion - Adds arbitrary_types_allowed=True to the config for extra safety The settings field now accepts either: - A chromadb.config.Settings instance (passed through unchanged) - A dictionary of settings parameters (converted to Settings internally) Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -2,10 +2,11 @@
|
|||||||
|
|
||||||
from dataclasses import field
|
from dataclasses import field
|
||||||
import os
|
import os
|
||||||
from typing import Literal, cast
|
from typing import Annotated, Any, Literal, cast
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from chromadb.config import Settings
|
from chromadb.config import Settings
|
||||||
|
from pydantic import BeforeValidator, ConfigDict, SkipValidation
|
||||||
from pydantic.dataclasses import dataclass as pyd_dataclass
|
from pydantic.dataclasses import dataclass as pyd_dataclass
|
||||||
|
|
||||||
from crewai.rag.chromadb.constants import (
|
from crewai.rag.chromadb.constants import (
|
||||||
@@ -31,6 +32,40 @@ warnings.filterwarnings(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_settings(value: Any) -> Settings:
|
||||||
|
"""Coerce input value to a chromadb.config.Settings instance.
|
||||||
|
|
||||||
|
This validator handles the Pydantic V1/V2 compatibility issue by:
|
||||||
|
- Passing through existing Settings objects without validation
|
||||||
|
- Converting dict inputs to Settings objects
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: Either a Settings instance or a dict of settings parameters.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A chromadb.config.Settings instance.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
TypeError: If value is neither a Settings instance nor a dict.
|
||||||
|
"""
|
||||||
|
if isinstance(value, Settings):
|
||||||
|
return value
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return Settings(**value)
|
||||||
|
raise TypeError(
|
||||||
|
f"settings must be a chromadb.config.Settings instance or a dict, "
|
||||||
|
f"got {type(value).__name__}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Type alias that skips Pydantic V2 validation for chromadb Settings (Pydantic V1 model)
|
||||||
|
# and uses a before validator to handle dict-to-Settings conversion
|
||||||
|
ChromaSettings = Annotated[
|
||||||
|
SkipValidation[Settings],
|
||||||
|
BeforeValidator(_coerce_settings),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def _default_settings() -> Settings:
|
def _default_settings() -> Settings:
|
||||||
"""Create default ChromaDB settings.
|
"""Create default ChromaDB settings.
|
||||||
|
|
||||||
@@ -64,14 +99,19 @@ def _default_embedding_function() -> ChromaEmbeddingFunctionWrapper:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pyd_dataclass(frozen=True)
|
@pyd_dataclass(frozen=True, config=ConfigDict(arbitrary_types_allowed=True))
|
||||||
class ChromaDBConfig(BaseRagConfig):
|
class ChromaDBConfig(BaseRagConfig):
|
||||||
"""Configuration for ChromaDB client."""
|
"""Configuration for ChromaDB client.
|
||||||
|
|
||||||
|
The settings field accepts either a chromadb.config.Settings instance
|
||||||
|
or a dictionary of settings parameters. This handles the Pydantic V1/V2
|
||||||
|
compatibility issue where ChromaDB uses Pydantic V1 for its Settings class.
|
||||||
|
"""
|
||||||
|
|
||||||
provider: Literal["chromadb"] = field(default="chromadb", init=False)
|
provider: Literal["chromadb"] = field(default="chromadb", init=False)
|
||||||
tenant: str = DEFAULT_TENANT
|
tenant: str = DEFAULT_TENANT
|
||||||
database: str = DEFAULT_DATABASE
|
database: str = DEFAULT_DATABASE
|
||||||
settings: Settings = field(default_factory=_default_settings)
|
settings: ChromaSettings = field(default_factory=_default_settings)
|
||||||
embedding_function: ChromaEmbeddingFunctionWrapper = field(
|
embedding_function: ChromaEmbeddingFunctionWrapper = field(
|
||||||
default_factory=_default_embedding_function
|
default_factory=_default_embedding_function
|
||||||
)
|
)
|
||||||
|
|||||||
130
lib/crewai/tests/rag/chromadb/test_config.py
Normal file
130
lib/crewai/tests/rag/chromadb/test_config.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Tests for ChromaDBConfig Pydantic V1/V2 compatibility."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from chromadb.config import Settings
|
||||||
|
|
||||||
|
from crewai.rag.chromadb.config import ChromaDBConfig, _coerce_settings
|
||||||
|
|
||||||
|
|
||||||
|
class TestCoerceSettings:
|
||||||
|
"""Test suite for _coerce_settings validator function."""
|
||||||
|
|
||||||
|
def test_coerce_settings_passes_through_settings_instance(self):
|
||||||
|
"""Test that existing Settings instances are passed through unchanged."""
|
||||||
|
settings = Settings(
|
||||||
|
persist_directory="./test_db",
|
||||||
|
allow_reset=True,
|
||||||
|
is_persistent=False,
|
||||||
|
)
|
||||||
|
result = _coerce_settings(settings)
|
||||||
|
assert result is settings
|
||||||
|
assert result.persist_directory == "./test_db"
|
||||||
|
assert result.allow_reset is True
|
||||||
|
assert result.is_persistent is False
|
||||||
|
|
||||||
|
def test_coerce_settings_converts_dict_to_settings(self):
|
||||||
|
"""Test that dict inputs are converted to Settings instances."""
|
||||||
|
settings_dict = {
|
||||||
|
"persist_directory": "./my_custom_db",
|
||||||
|
"allow_reset": True,
|
||||||
|
"is_persistent": True,
|
||||||
|
}
|
||||||
|
result = _coerce_settings(settings_dict)
|
||||||
|
assert isinstance(result, Settings)
|
||||||
|
assert result.persist_directory == "./my_custom_db"
|
||||||
|
assert result.allow_reset is True
|
||||||
|
assert result.is_persistent is True
|
||||||
|
|
||||||
|
def test_coerce_settings_raises_type_error_for_invalid_input(self):
|
||||||
|
"""Test that invalid input types raise TypeError."""
|
||||||
|
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
|
||||||
|
_coerce_settings("invalid_string")
|
||||||
|
|
||||||
|
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
|
||||||
|
_coerce_settings(123)
|
||||||
|
|
||||||
|
with pytest.raises(TypeError, match="settings must be a chromadb.config.Settings"):
|
||||||
|
_coerce_settings(["list", "of", "items"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestChromaDBConfigPydanticCompatibility:
|
||||||
|
"""Test suite for ChromaDBConfig Pydantic V1/V2 compatibility.
|
||||||
|
|
||||||
|
These tests verify the fix for GitHub issue #4095:
|
||||||
|
Pydantic V1/V2 Compatibility Crash in RagTool when passing custom ChromaDB Settings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_chromadb_config_accepts_settings_instance(self):
|
||||||
|
"""Test that ChromaDBConfig accepts a chromadb.config.Settings instance.
|
||||||
|
|
||||||
|
This is the main regression test for issue #4095 where passing a Settings
|
||||||
|
instance would cause: TypeError: BaseModel.validate() takes 2 positional
|
||||||
|
arguments but 3 were given
|
||||||
|
"""
|
||||||
|
custom_settings = Settings(
|
||||||
|
persist_directory="./my_db",
|
||||||
|
allow_reset=True,
|
||||||
|
is_persistent=False,
|
||||||
|
)
|
||||||
|
config = ChromaDBConfig(settings=custom_settings)
|
||||||
|
|
||||||
|
assert config.settings is custom_settings
|
||||||
|
assert config.settings.persist_directory == "./my_db"
|
||||||
|
assert config.settings.allow_reset is True
|
||||||
|
assert config.settings.is_persistent is False
|
||||||
|
|
||||||
|
def test_chromadb_config_accepts_settings_dict(self):
|
||||||
|
"""Test that ChromaDBConfig accepts a dict for settings and converts it."""
|
||||||
|
settings_dict = {
|
||||||
|
"persist_directory": "./dict_db",
|
||||||
|
"allow_reset": False,
|
||||||
|
"is_persistent": True,
|
||||||
|
}
|
||||||
|
config = ChromaDBConfig(settings=settings_dict)
|
||||||
|
|
||||||
|
assert isinstance(config.settings, Settings)
|
||||||
|
assert config.settings.persist_directory == "./dict_db"
|
||||||
|
assert config.settings.allow_reset is False
|
||||||
|
assert config.settings.is_persistent is True
|
||||||
|
|
||||||
|
def test_chromadb_config_uses_default_settings_when_not_provided(self):
|
||||||
|
"""Test that ChromaDBConfig uses default settings when none provided."""
|
||||||
|
config = ChromaDBConfig()
|
||||||
|
|
||||||
|
assert isinstance(config.settings, Settings)
|
||||||
|
assert config.settings.allow_reset is True
|
||||||
|
assert config.settings.is_persistent is True
|
||||||
|
|
||||||
|
def test_chromadb_config_with_all_parameters(self):
|
||||||
|
"""Test ChromaDBConfig with all parameters including custom settings."""
|
||||||
|
custom_settings = Settings(
|
||||||
|
persist_directory="./full_test_db",
|
||||||
|
allow_reset=True,
|
||||||
|
is_persistent=True,
|
||||||
|
)
|
||||||
|
config = ChromaDBConfig(
|
||||||
|
tenant="test_tenant",
|
||||||
|
database="test_database",
|
||||||
|
settings=custom_settings,
|
||||||
|
limit=10,
|
||||||
|
score_threshold=0.8,
|
||||||
|
batch_size=50,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert config.tenant == "test_tenant"
|
||||||
|
assert config.database == "test_database"
|
||||||
|
assert config.settings is custom_settings
|
||||||
|
assert config.limit == 10
|
||||||
|
assert config.score_threshold == 0.8
|
||||||
|
assert config.batch_size == 50
|
||||||
|
|
||||||
|
def test_chromadb_config_provider_is_chromadb(self):
|
||||||
|
"""Test that provider field is always 'chromadb'."""
|
||||||
|
config = ChromaDBConfig()
|
||||||
|
assert config.provider == "chromadb"
|
||||||
|
|
||||||
|
def test_chromadb_config_is_frozen(self):
|
||||||
|
"""Test that ChromaDBConfig is immutable (frozen)."""
|
||||||
|
config = ChromaDBConfig()
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
config.tenant = "new_tenant"
|
||||||
Reference in New Issue
Block a user