From e6fba64939214ed9c7d01e76f832e0c07c540da5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 17 Mar 2025 04:45:49 +0000
Subject: [PATCH] Implement set_knowledge method in BaseAgent to enable
 knowledge integration (fixes #2385)

Co-Authored-By: Joe Moura <joao@crewai.com>
---
 src/crewai/agents/agent_builder/base_agent.py | 49 ++++++++++++-
 tests/agent_test.py                           | 69 +++++++++++++++++++
 2 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/src/crewai/agents/agent_builder/base_agent.py b/src/crewai/agents/agent_builder/base_agent.py
index 47515d087..7a178ba6b 100644
--- a/src/crewai/agents/agent_builder/base_agent.py
+++ b/src/crewai/agents/agent_builder/base_agent.py
@@ -148,6 +148,10 @@ class BaseAgent(ABC, BaseModel):
         default=None,
         description="Custom knowledge storage for the agent.",
     )
+    embedder_config: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Configuration for embedding generation.",
+    )
     security_config: SecurityConfig = Field(
         default_factory=SecurityConfig,
         description="Security configuration for the agent, including fingerprinting.",
@@ -362,5 +366,46 @@ class BaseAgent(ABC, BaseModel):
             self._rpm_controller = rpm_controller
             self.create_agent_executor()
 
-    def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
-        pass
+    def set_knowledge(
+        self, 
+        knowledge_sources: Optional[List[BaseKnowledgeSource]] = None, 
+        embedder_config: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Set knowledge sources for the agent with optional embedder configuration.
+        
+        This method allows agents to integrate external knowledge sources for enhanced
+        contextual understanding and information retrieval during task execution.
+        
+        Args:
+            knowledge_sources: List of knowledge sources to integrate. These can include
+                various data types such as text files, PDFs, CSV files, JSON files,
+                web pages, YouTube videos, and documentation websites.
+            embedder_config: Configuration for embedding generation. If not provided,
+                a default configuration will be used.
+        
+        Raises:
+            ValueError: If the provided knowledge sources are invalid.
+        """
+        try:
+            # Validate knowledge sources first
+            if knowledge_sources:
+                if not isinstance(knowledge_sources, list):
+                    raise ValueError("Knowledge sources must be a list")
+                
+                if not all(isinstance(k, BaseKnowledgeSource) for k in knowledge_sources):
+                    raise ValueError("All knowledge sources must be instances of BaseKnowledgeSource")
+                
+                self.knowledge_sources = knowledge_sources
+                
+            if embedder_config:
+                self.embedder_config = embedder_config
+                
+            if self.knowledge_sources:
+                knowledge_agent_name = f"{self.role.replace(' ', '_')}"
+                self.knowledge = Knowledge(
+                    sources=self.knowledge_sources,
+                    embedder_config=self.embedder_config,
+                    collection_name=knowledge_agent_name,
+                )
+        except (TypeError, ValueError) as e:
+            raise ValueError(f"Invalid Knowledge Configuration: {str(e)}")
diff --git a/tests/agent_test.py b/tests/agent_test.py
index b5b3aae93..f5d2ecd69 100644
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -1586,6 +1586,75 @@ def test_agent_execute_task_with_ollama():
     assert "AI" in result or "artificial intelligence" in result.lower()
 
 
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_base_agent_set_knowledge():
+    """Test that set_knowledge correctly sets knowledge sources and creates a Knowledge object."""
+    from crewai.agents.agent_builder.base_agent import BaseAgent
+    from crewai.knowledge.knowledge import Knowledge
+    
+    # Create a test implementation of BaseAgent
+    class TestAgent(BaseAgent):
+        def execute_task(self, task, context=None, tools=None):
+            return "Test execution"
+            
+        def create_agent_executor(self, tools=None):
+            pass
+            
+        def _parse_tools(self, tools):
+            return tools
+            
+        def get_delegation_tools(self, agents):
+            return []
+            
+        def get_output_converter(self, llm, text, model, instructions):
+            return None
+    
+    # Create a knowledge source with some content
+    content = "The capital of France is Paris."
+    string_source = StringKnowledgeSource(content=content)
+    
+    # Create an agent
+    agent = TestAgent(
+        role="Test Agent",
+        goal="Test Goal",
+        backstory="Test Backstory",
+    )
+    
+    # Mock the Knowledge class to avoid API calls
+    with patch("crewai.agents.agent_builder.base_agent.Knowledge") as MockKnowledge:
+        mock_knowledge_instance = MockKnowledge.return_value
+        mock_knowledge_instance.sources = [string_source]
+        
+        # Test setting knowledge
+        agent.set_knowledge(knowledge_sources=[string_source])
+        
+        # Verify that knowledge was set correctly
+        assert agent.knowledge_sources == [string_source]
+        assert agent.knowledge is not None
+        assert MockKnowledge.called
+        assert MockKnowledge.call_args[1]["collection_name"] == "Test_Agent"
+        
+        # Test with embedder config
+        embedder_config = {
+            "provider": "openai",
+            "model": "text-embedding-3-small"
+        }
+        
+        agent.set_knowledge(
+            knowledge_sources=[string_source],
+            embedder_config=embedder_config
+        )
+        
+        assert agent.embedder_config == embedder_config
+        assert MockKnowledge.call_args[1]["embedder_config"] == embedder_config
+    
+    # Test with invalid knowledge source - we need to directly test the validation logic
+    # rather than relying on the Knowledge class to raise an error
+    with pytest.raises(ValueError):
+        # This will trigger the validation check in set_knowledge
+        agent.set_knowledge(knowledge_sources=["invalid source"])
+
+
 @pytest.mark.vcr(filter_headers=["authorization"])
 def test_agent_with_knowledge_sources():
     # Create a knowledge source with some content