From e4063baca76945abc4cced4559a0b0948211b30e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 19:17:36 +0000 Subject: [PATCH] Fix issue #2832: Add VoyageAI embedding function implementation Co-Authored-By: Joe Moura --- .../knowledge/embedder/chromadb/__init__.py | 3 ++ .../embedder/chromadb/utils/__init__.py | 3 ++ .../utils/embedding_functions/__init__.py | 3 ++ .../voyageai_embedding_function.py | 42 +++++++++++++++++ .../utilities/test_embedding_configurator.py | 47 +++++++++++++++++++ 5 files changed, 98 insertions(+) create mode 100644 src/crewai/knowledge/embedder/chromadb/__init__.py create mode 100644 src/crewai/knowledge/embedder/chromadb/utils/__init__.py create mode 100644 src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/__init__.py create mode 100644 src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/voyageai_embedding_function.py create mode 100644 tests/utilities/test_embedding_configurator.py diff --git a/src/crewai/knowledge/embedder/chromadb/__init__.py b/src/crewai/knowledge/embedder/chromadb/__init__.py new file mode 100644 index 000000000..50b61be50 --- /dev/null +++ b/src/crewai/knowledge/embedder/chromadb/__init__.py @@ -0,0 +1,3 @@ +from .utils.embedding_functions import VoyageAIEmbeddingFunction + +__all__ = ["VoyageAIEmbeddingFunction"] diff --git a/src/crewai/knowledge/embedder/chromadb/utils/__init__.py b/src/crewai/knowledge/embedder/chromadb/utils/__init__.py new file mode 100644 index 000000000..b1c267c29 --- /dev/null +++ b/src/crewai/knowledge/embedder/chromadb/utils/__init__.py @@ -0,0 +1,3 @@ +from . import embedding_functions + +__all__ = ["embedding_functions"] diff --git a/src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/__init__.py b/src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/__init__.py new file mode 100644 index 000000000..88a38d317 --- /dev/null +++ b/src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/__init__.py @@ -0,0 +1,3 @@ +from .voyageai_embedding_function import VoyageAIEmbeddingFunction + +__all__ = ["VoyageAIEmbeddingFunction"] diff --git a/src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/voyageai_embedding_function.py b/src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/voyageai_embedding_function.py new file mode 100644 index 000000000..c60652bab --- /dev/null +++ b/src/crewai/knowledge/embedder/chromadb/utils/embedding_functions/voyageai_embedding_function.py @@ -0,0 +1,42 @@ +import logging +from typing import List, Optional + +from chromadb.api.types import Documents, EmbeddingFunction, Embeddings + +logger = logging.getLogger(__name__) + + +class VoyageAIEmbeddingFunction(EmbeddingFunction[Documents]): + def __init__(self, api_key: str, model_name: str = "voyage-3"): + try: + import voyageai + except ImportError: + raise ValueError( + "The voyageai python package is not installed. Please install it with `pip install voyageai`" + ) + + self._api_key = api_key + self._model_name = model_name + + def __call__(self, input: Documents) -> Embeddings: + try: + import voyageai + except ImportError: + raise ValueError( + "The voyageai python package is not installed. Please install it with `pip install voyageai`" + ) + + if not input: + return [] + + if isinstance(input, str): + input = [input] + + try: + embeddings = voyageai.get_embeddings( + input, model=self._model_name, api_key=self._api_key + ) + return embeddings + except Exception as e: + logger.error(f"Error during VoyageAI embedding: {e}") + raise e diff --git a/tests/utilities/test_embedding_configurator.py b/tests/utilities/test_embedding_configurator.py new file mode 100644 index 000000000..a0446ab5a --- /dev/null +++ b/tests/utilities/test_embedding_configurator.py @@ -0,0 +1,47 @@ +import os +import pytest +from unittest.mock import patch, MagicMock + +from crewai.utilities.embedding_configurator import EmbeddingConfigurator + + +class TestEmbeddingConfigurator: + def test_configure_voyageai_embedder(self): + """Test that the VoyageAI embedder is configured correctly.""" + with patch( + "crewai.utilities.embedding_configurator.VoyageAIEmbeddingFunction" + ) as mock_voyageai: + mock_instance = MagicMock() + mock_voyageai.return_value = mock_instance + + config = {"api_key": "test-key"} + model_name = "voyage-3" + + configurator = EmbeddingConfigurator() + embedder = configurator._configure_voyageai(config, model_name) + + mock_voyageai.assert_called_once_with( + model_name=model_name, api_key="test-key" + ) + assert embedder == mock_instance + + def test_configure_embedder_with_voyageai(self): + """Test that the embedder configurator correctly handles VoyageAI provider.""" + with patch( + "crewai.utilities.embedding_configurator.VoyageAIEmbeddingFunction" + ) as mock_voyageai: + mock_instance = MagicMock() + mock_voyageai.return_value = mock_instance + + embedder_config = { + "provider": "voyageai", + "config": {"api_key": "test-key", "model": "voyage-3"}, + } + + configurator = EmbeddingConfigurator() + embedder = configurator.configure_embedder(embedder_config) + + mock_voyageai.assert_called_once_with( + model_name="voyage-3", api_key="test-key" + ) + assert embedder == mock_instance