From ef0c8a7b65ff7d53463e26cbf57eaa52371855af Mon Sep 17 00:00:00 2001
From: fzowl <zoltan@voyageai.com>
Date: Thu, 9 Jan 2025 19:29:18 +0100
Subject: [PATCH] Introducing VoyageAI's embedding models

---
 docs/concepts/knowledge.mdx                   | 13 ++++----
 docs/concepts/memory.mdx                      | 32 +++++++++++++++----
 docs/how-to/llm-connections.mdx               |  5 +--
 .../utilities/embedding_configurator.py       | 12 +++++++
 4 files changed, 48 insertions(+), 14 deletions(-)
diff --git a/docs/concepts/knowledge.mdx b/docs/concepts/knowledge.mdx
index 91110e19f..0faf38014 100644
--- a/docs/concepts/knowledge.mdx
+++ b/docs/concepts/knowledge.mdx
@@ -45,7 +45,7 @@ CrewAI supports various types of knowledge sources out of the box:
 ## Quickstart Example
 
 <Tip>
-For file-Based Knowledge Sources, make sure to place your files in a `knowledge` directory at the root of your project. 
+For file-Based Knowledge Sources, make sure to place your files in a `knowledge` directory at the root of your project.
 Also, use relative paths from the `knowledge` directory when creating the source.
 </Tip>
 
@@ -91,7 +91,7 @@ result = crew.kickoff(inputs={"question": "What city does John live in and how o
 ```
 
 
-Here's another example with the `CrewDoclingSource`. The CrewDoclingSource is actually quite versatile and can handle multiple file formats including TXT, PDF, DOCX, HTML, and more. 
+Here's another example with the `CrewDoclingSource`. The CrewDoclingSource is actually quite versatile and can handle multiple file formats including TXT, PDF, DOCX, HTML, and more.
 
 ```python Code
 from crewai import LLM, Agent, Crew, Process, Task
@@ -253,7 +253,7 @@ crew = Crew(
 
 ### Chunking Configuration
 
-Knowledge sources automatically chunk content for better processing. 
+Knowledge sources automatically chunk content for better processing.
 You can configure chunking behavior in your knowledge sources:
 
 ```python
@@ -273,7 +273,7 @@ The chunking configuration helps in:
 
 ### Embeddings Configuration
 
-You can also configure the embedder for the knowledge store. 
+You can also configure the embedder for the knowledge store.
 This is useful if you want to use a different embedder for the knowledge store than the one used for the agents.
 The `embedder` parameter supports various embedding model providers that include:
 - `openai`: OpenAI's embedding models
@@ -282,6 +282,7 @@ The `embedder` parameter supports various embedding model providers that include
 - `ollama`: Local embeddings with Ollama
 - `vertexai`: Google Cloud VertexAI embeddings
 - `cohere`: Cohere's embedding models
+- `voyageai`: VoyageAI's embedding models
 - `bedrock`: AWS Bedrock embeddings
 - `huggingface`: Hugging Face models
 - `watson`: IBM Watson embeddings
@@ -347,7 +348,7 @@ result = crew.kickoff(inputs={"question": "What city does John live in and how o
 ## Task: Answer the following questions about the user: What city does John live in and how old is he?
 
 # Agent: About User
-## Final Answer: 
+## Final Answer:
 John is 30 years old and lives in San Francisco.
 ```
 </CodeGroup>
@@ -603,7 +604,7 @@ recent_news = SpaceNewsKnowledgeSource(
   </Accordion>
 
   <Accordion title="Performance Tips">
-    - Adjust chunk sizes based on content complexity 
+    - Adjust chunk sizes based on content complexity
     - Configure appropriate embedding models
     - Consider using local embedding providers for faster processing
   </Accordion>
diff --git a/docs/concepts/memory.mdx b/docs/concepts/memory.mdx
index b04b29c64..22667da56 100644
--- a/docs/concepts/memory.mdx
+++ b/docs/concepts/memory.mdx
@@ -6,8 +6,8 @@ icon: database
 
 ## Introduction to Memory Systems in CrewAI
 
-The crewAI framework introduces a sophisticated memory system designed to significantly enhance the capabilities of AI agents. 
-This system comprises `short-term memory`, `long-term memory`, `entity memory`, and `contextual memory`, each serving a unique purpose in aiding agents to remember, 
+The crewAI framework introduces a sophisticated memory system designed to significantly enhance the capabilities of AI agents.
+This system comprises `short-term memory`, `long-term memory`, `entity memory`, and `contextual memory`, each serving a unique purpose in aiding agents to remember,
 reason, and learn from past interactions.
 
 ## Memory System Components
@@ -31,8 +31,8 @@ reason, and learn from past interactions.
 ## Implementing Memory in Your Crew
 
 When configuring a crew, you can enable and customize each memory component to suit the crew's objectives and the nature of tasks it will perform.
-By default, the memory system is disabled, and you can ensure it is active by setting `memory=True` in the crew configuration. 
-The memory will use OpenAI embeddings by default, but you can change it by setting `embedder` to a different model. 
+By default, the memory system is disabled, and you can ensure it is active by setting `memory=True` in the crew configuration.
+The memory will use OpenAI embeddings by default, but you can change it by setting `embedder` to a different model.
 It's also possible to initialize the memory instance with your own instance.
 
 The 'embedder' only applies to **Short-Term Memory** which uses Chroma for RAG.
@@ -95,7 +95,7 @@ my_crew = Crew(
 
 ## Integrating Mem0 for Enhanced User Memory
 
-[Mem0](https://mem0.ai/) is a self-improving memory layer for LLM applications, enabling personalized AI experiences. 
+[Mem0](https://mem0.ai/) is a self-improving memory layer for LLM applications, enabling personalized AI experiences.
 
 To include user-specific memory you can get your API key [here](https://app.mem0.ai/dashboard/api-keys) and refer the [docs](https://docs.mem0.ai/platform/quickstart#4-1-create-memories) for adding user preferences.
 
@@ -293,6 +293,26 @@ my_crew = Crew(
     }
 )
 ```
+### Using VoyageAI embeddings
+
+```python Code
+from crewai import Crew, Agent, Task, Process
+
+my_crew = Crew(
+    agents=[...],
+    tasks=[...],
+    process=Process.sequential,
+    memory=True,
+    verbose=True,
+    embedder={
+        "provider": "voyageai",
+        "config": {
+            "api_key": "YOUR_API_KEY",
+            "model_name": "<model_name>"
+        }
+    }
+)
+```
 ### Using HuggingFace embeddings
 
 ```python Code
@@ -363,5 +383,5 @@ crewai reset-memories [OPTIONS]
 
 ## Conclusion
 
-Integrating CrewAI's memory system into your projects is straightforward. By leveraging the provided memory components and configurations, 
+Integrating CrewAI's memory system into your projects is straightforward. By leveraging the provided memory components and configurations,
 you can quickly empower your agents with the ability to remember, reason, and learn from their interactions, unlocking new levels of intelligence and capability.
diff --git a/docs/how-to/llm-connections.mdx b/docs/how-to/llm-connections.mdx
index 25509c299..92f485fec 100644
--- a/docs/how-to/llm-connections.mdx
+++ b/docs/how-to/llm-connections.mdx
@@ -9,7 +9,7 @@ icon: brain-circuit
 CrewAI uses LiteLLM to connect to a wide variety of Language Models (LLMs). This integration provides extensive versatility, allowing you to use models from numerous providers with a simple, unified interface.
 
 <Note>
-    By default, CrewAI uses the `gpt-4o-mini` model. This is determined by the `OPENAI_MODEL_NAME` environment variable, which defaults to "gpt-4o-mini" if not set. 
+    By default, CrewAI uses the `gpt-4o-mini` model. This is determined by the `OPENAI_MODEL_NAME` environment variable, which defaults to "gpt-4o-mini" if not set.
     You can easily configure your agents to use a different model or provider as described in this guide.
 </Note>
 
@@ -23,6 +23,7 @@ LiteLLM supports a wide range of providers, including but not limited to:
 - Azure OpenAI
 - AWS (Bedrock, SageMaker)
 - Cohere
+- VoyageAI
 - Hugging Face
 - Ollama
 - Mistral AI
@@ -168,7 +169,7 @@ For local models like those provided by Ollama:
 
 You can change the base API URL for any LLM provider by setting the `base_url` parameter:
 
-```python Code  
+```python Code
 llm = LLM(
     model="custom-model-name",
     base_url="https://api.your-provider.com/v1",
diff --git a/src/crewai/utilities/embedding_configurator.py b/src/crewai/utilities/embedding_configurator.py
index 44e832ec2..71965bf53 100644
--- a/src/crewai/utilities/embedding_configurator.py
+++ b/src/crewai/utilities/embedding_configurator.py
@@ -14,6 +14,7 @@ class EmbeddingConfigurator:
             "vertexai": self._configure_vertexai,
             "google": self._configure_google,
             "cohere": self._configure_cohere,
+            "voyageai": self._configure_voyageai,
             "bedrock": self._configure_bedrock,
             "huggingface": self._configure_huggingface,
             "watson": self._configure_watson,
@@ -124,6 +125,17 @@ class EmbeddingConfigurator:
             api_key=config.get("api_key"),
         )
 
+    @staticmethod
+    def _configure_voyageai(config, model_name):
+        from chromadb.utils.embedding_functions.voyageai_embedding_function import (
+            VoyageAIEmbeddingFunction,
+        )
+
+        return VoyageAIEmbeddingFunction(
+            model_name=model_name,
+            api_key=config.get("api_key"),
+        )
+
     @staticmethod
     def _configure_bedrock(config, model_name):
         from chromadb.utils.embedding_functions.amazon_bedrock_embedding_function import (