From f02db1a4f52d359a24dd1df8a365e744f4cc67ae Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2025 15:23:53 +0000 Subject: [PATCH] fix: Agent-level knowledge sources with non-OpenAI embedders - Remove OpenAI default from KnowledgeStorage - Add proper embedder config inheritance from crew to agent - Improve error messaging for missing embedder config - Add tests for agent-level knowledge sources Fixes #2164 Co-Authored-By: Joe Moura --- src/crewai/agent.py | 9 +- src/crewai/crew.py | 4 +- src/crewai/knowledge/knowledge.py | 9 +- .../knowledge/storage/knowledge_storage.py | 21 ++--- tests/test_agent_knowledge.py | 94 +++++++++++++++++++ 5 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 tests/test_agent_knowledge.py diff --git a/src/crewai/agent.py b/src/crewai/agent.py index 4c1e3c393..8bb1176c9 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -115,7 +115,7 @@ class Agent(BaseAgent): default="safe", description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).", ) - embedder: Optional[Dict[str, Any]] = Field( + embedder_config: Optional[Dict[str, Any]] = Field( default=None, description="Embedder configuration for the agent.", ) @@ -150,9 +150,14 @@ class Agent(BaseAgent): if isinstance(self.knowledge_sources, list) and all( isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources ): + # Use agent's embedder config if provided, otherwise use crew's + embedder_config = self.embedder_config + if not embedder_config and self.crew: + embedder_config = self.crew.embedder_config + self.knowledge = Knowledge( sources=self.knowledge_sources, - embedder=self.embedder, + embedder_config=embedder_config, collection_name=knowledge_agent_name, storage=self.knowledge_storage or None, ) diff --git a/src/crewai/crew.py b/src/crewai/crew.py index 682d5d60b..9dcccdbf2 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -138,7 +138,7 @@ class Crew(BaseModel): default=None, description="An instance of the UserMemory to be used by the Crew to store/fetch memories of a specific user.", ) - embedder: Optional[dict] = Field( + embedder_config: Optional[dict] = Field( default=None, description="Configuration for the embedder to be used for the crew.", ) @@ -308,7 +308,7 @@ class Crew(BaseModel): ): self.knowledge = Knowledge( sources=self.knowledge_sources, - embedder=self.embedder, + embedder_config=self.embedder_config, collection_name="crew", ) diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py index da1db90a8..a15c044c7 100644 --- a/src/crewai/knowledge/knowledge.py +++ b/src/crewai/knowledge/knowledge.py @@ -15,29 +15,30 @@ class Knowledge(BaseModel): Args: sources: List[BaseKnowledgeSource] = Field(default_factory=list) storage: Optional[KnowledgeStorage] = Field(default=None) - embedder: Optional[Dict[str, Any]] = None + embedder_config: Optional[Dict[str, Any]] = None """ sources: List[BaseKnowledgeSource] = Field(default_factory=list) model_config = ConfigDict(arbitrary_types_allowed=True) storage: Optional[KnowledgeStorage] = Field(default=None) - embedder: Optional[Dict[str, Any]] = None + embedder_config: Optional[Dict[str, Any]] = None collection_name: Optional[str] = None def __init__( self, collection_name: str, sources: List[BaseKnowledgeSource], - embedder: Optional[Dict[str, Any]] = None, + embedder_config: Optional[Dict[str, Any]] = None, storage: Optional[KnowledgeStorage] = None, **data, ): super().__init__(**data) + self.embedder_config = embedder_config if storage: self.storage = storage else: self.storage = KnowledgeStorage( - embedder=embedder, collection_name=collection_name + embedder_config=embedder_config, collection_name=collection_name ) self.sources = sources self.storage.initialize_knowledge_storage() diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index 9e6ab8041..f9e4bbdf3 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -48,11 +48,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): def __init__( self, - embedder: Optional[Dict[str, Any]] = None, + embedder_config: Optional[Dict[str, Any]] = None, collection_name: Optional[str] = None, ): self.collection_name = collection_name - self._set_embedder_config(embedder) + self._set_embedder_config(embedder_config) def search( self, @@ -179,15 +179,14 @@ class KnowledgeStorage(BaseKnowledgeStorage): raise def _create_default_embedding_function(self): - from chromadb.utils.embedding_functions.openai_embedding_function import ( - OpenAIEmbeddingFunction, + raise ValueError( + "No embedder configuration provided. Please provide an embedder configuration " + "either at the crew level or agent level. You can configure embeddings using " + "the 'embedder_config' parameter with providers like 'openai', 'watson', etc. " + "Example: embedder_config={'provider': 'openai', 'config': {'api_key': 'your-key'}}" ) - return OpenAIEmbeddingFunction( - api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" - ) - - def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None: + def _set_embedder_config(self, embedder_config: Optional[Dict[str, Any]] = None) -> None: """Set the embedding configuration for the knowledge storage. Args: @@ -195,7 +194,7 @@ class KnowledgeStorage(BaseKnowledgeStorage): If None or empty, defaults to the default embedding function. """ self.embedder = ( - EmbeddingConfigurator().configure_embedder(embedder) - if embedder + EmbeddingConfigurator().configure_embedder(embedder_config) + if embedder_config else self._create_default_embedding_function() ) diff --git a/tests/test_agent_knowledge.py b/tests/test_agent_knowledge.py new file mode 100644 index 000000000..ff9e02214 --- /dev/null +++ b/tests/test_agent_knowledge.py @@ -0,0 +1,94 @@ +import pytest +from unittest.mock import patch, MagicMock + +from chromadb.api.types import EmbeddingFunction +from crewai import Agent, Crew, Task +from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource +from crewai.process import Process + +class MockEmbeddingFunction(EmbeddingFunction): + def __call__(self, texts): + return [[0.0] * 1536 for _ in texts] + +@pytest.fixture(autouse=True) +def mock_vector_db(): + """Mock vector database operations.""" + with patch("crewai.knowledge.storage.knowledge_storage.KnowledgeStorage") as mock, \ + patch("chromadb.PersistentClient") as mock_chroma: + # Mock ChromaDB client and collection + mock_collection = MagicMock() + mock_collection.query.return_value = { + "ids": [["1"]], + "distances": [[0.1]], + "metadatas": [[{"source": "test"}]], + "documents": [["Test content"]] + } + mock_chroma.return_value.get_or_create_collection.return_value = mock_collection + + # Mock the query method to return a predefined response + instance = mock.return_value + instance.query.return_value = [ + { + "context": "Test content", + "score": 0.9, + } + ] + instance.reset.return_value = None + yield instance + +def test_agent_knowledge_with_custom_embedder(mock_vector_db): + agent = Agent( + role="test role", + goal="test goal", + backstory="test backstory", + knowledge_sources=[StringKnowledgeSource(content="test content")], + embedder_config={ + "provider": "custom", + "config": { + "embedder": MockEmbeddingFunction() + } + } + ) + assert agent.knowledge is not None + assert agent.knowledge.storage.embedder is not None + +def test_agent_inherits_crew_embedder(mock_vector_db): + test_agent = Agent( + role="test role", + goal="test goal", + backstory="test backstory" + ) + test_task = Task( + description="test task", + expected_output="test output", + agent=test_agent + ) + crew = Crew( + agents=[test_agent], + tasks=[test_task], + process=Process.sequential, + embedder_config={ + "provider": "custom", + "config": { + "embedder": MockEmbeddingFunction() + } + } + ) + agent = Agent( + role="test role", + goal="test goal", + backstory="test backstory", + knowledge_sources=[StringKnowledgeSource(content="test content")], + crew=crew + ) + assert agent.knowledge is not None + assert agent.knowledge.storage.embedder is not None + +def test_agent_knowledge_without_embedder_raises_error(mock_vector_db): + with pytest.raises(ValueError, match="No embedder configuration provided"): + agent = Agent( + role="test role", + goal="test goal", + backstory="test backstory", + knowledge_sources=[StringKnowledgeSource(content="test content")] + )