feat: update ChromaDB embedding function to use OpenAI API (#3538)

- Refactor the default embedding function to utilize OpenAI's embedding function with API key support.
- Import necessary OpenAI embedding function and configure it with the environment variable for the API key.
- Ensure compatibility with existing ChromaDB configuration model.
This commit is contained in:
Lorenze Jay
2025-09-18 14:50:35 -07:00
committed by GitHub
parent d4aa676195
commit 2f682e1564

View File

@@ -1,11 +1,11 @@
"""ChromaDB configuration model.""" """ChromaDB configuration model."""
import os
import warnings import warnings
from dataclasses import field from dataclasses import field
from typing import Literal, cast from typing import Literal, cast
from chromadb.config import Settings from chromadb.config import Settings
from chromadb.utils.embedding_functions import DefaultEmbeddingFunction
from pydantic.dataclasses import dataclass as pyd_dataclass from pydantic.dataclasses import dataclass as pyd_dataclass
from crewai.rag.chromadb.constants import ( from crewai.rag.chromadb.constants import (
@@ -49,7 +49,17 @@ def _default_embedding_function() -> ChromaEmbeddingFunctionWrapper:
Returns: Returns:
Default embedding function using all-MiniLM-L6-v2 via ONNX. Default embedding function using all-MiniLM-L6-v2 via ONNX.
""" """
return cast(ChromaEmbeddingFunctionWrapper, DefaultEmbeddingFunction()) from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
return cast(
ChromaEmbeddingFunctionWrapper,
OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
model_name="text-embedding-3-small",
),
)
@pyd_dataclass(frozen=True) @pyd_dataclass(frozen=True)