feat: add custom embedding types and migrate providers

- introduce baseembeddingsprovider and helper for embedding functions  
- add core embedding types and migrate providers, factory, and storage modules  
- remove unused type aliases and fix pydantic schema error  
- update providers with env var support and related fixes
This commit is contained in:
Greyson LaLonde
2025-09-25 18:28:39 -04:00
committed by GitHub
parent e070c1400c
commit ce5ea9be6f
74 changed files with 2767 additions and 1308 deletions

View File

@@ -1,82 +0,0 @@
"""Test Azure embedder configuration with nested format only."""
from unittest.mock import MagicMock, patch
from crewai.rag.embeddings.configurator import EmbeddingConfigurator
class TestAzureEmbedderConfiguration:
"""Test Azure embedder configuration with nested format."""
@patch(
"chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction"
)
def test_azure_openai_with_nested_config(self, mock_openai_func):
"""Test Azure configuration using OpenAI provider with nested config key."""
mock_embedding = MagicMock()
mock_openai_func.return_value = mock_embedding
configurator = EmbeddingConfigurator()
embedder_config = {
"provider": "openai",
"config": {
"api_key": "test-azure-key",
"api_base": "https://test.openai.azure.com/",
"api_type": "azure",
"api_version": "2023-05-15",
"model": "text-embedding-3-small",
"deployment_id": "test-deployment",
},
}
result = configurator.configure_embedder(embedder_config)
mock_openai_func.assert_called_once_with(
api_key="test-azure-key",
model_name="text-embedding-3-small",
api_base="https://test.openai.azure.com/",
api_type="azure",
api_version="2023-05-15",
default_headers=None,
dimensions=None,
deployment_id="test-deployment",
organization_id=None,
)
assert result == mock_embedding
@patch(
"chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction"
)
def test_azure_provider_with_nested_config(self, mock_openai_func):
"""Test using 'azure' as provider with nested config."""
mock_embedding = MagicMock()
mock_openai_func.return_value = mock_embedding
configurator = EmbeddingConfigurator()
embedder_config = {
"provider": "azure",
"config": {
"api_key": "test-azure-key",
"api_base": "https://test.openai.azure.com/",
"api_version": "2023-05-15",
"model": "text-embedding-3-small",
"deployment_id": "test-deployment",
},
}
result = configurator.configure_embedder(embedder_config)
mock_openai_func.assert_called_once_with(
api_key="test-azure-key",
api_base="https://test.openai.azure.com/",
api_type="azure",
api_version="2023-05-15",
model_name="text-embedding-3-small",
default_headers=None,
dimensions=None,
deployment_id="test-deployment",
organization_id=None,
)
assert result == mock_embedding

View File

@@ -1,25 +0,0 @@
from unittest.mock import patch
import pytest
from crewai.rag.embeddings.configurator import EmbeddingConfigurator
def test_configure_embedder_importerror():
configurator = EmbeddingConfigurator()
embedder_config = {
'provider': 'openai',
'config': {
'model': 'text-embedding-ada-002',
}
}
with patch('chromadb.utils.embedding_functions.openai_embedding_function.OpenAIEmbeddingFunction') as mock_openai:
mock_openai.side_effect = ImportError("Module not found.")
with pytest.raises(ImportError) as exc_info:
configurator.configure_embedder(embedder_config)
assert str(exc_info.value) == "Module not found."
mock_openai.assert_called_once()