Implement set_knowledge method in BaseAgent to enable knowledge integration (fixes #2385)

Co-Authored-By: Joe Moura <joao@crewai.com>
2026-01-13 18:18:29 +00:00 · 2025-03-17 04:45:49 +00:00
parent 24f1a19310
commit e6fba64939
2 changed files with 116 additions and 2 deletions
--- a/src/crewai/agents/agent_builder/base_agent.py
+++ b/src/crewai/agents/agent_builder/base_agent.py
@@ -148,6 +148,10 @@ class BaseAgent(ABC, BaseModel):
        default=None,
        description="Custom knowledge storage for the agent.",
    )
+    embedder_config: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Configuration for embedding generation.",
+    )
    security_config: SecurityConfig = Field(
        default_factory=SecurityConfig,
        description="Security configuration for the agent, including fingerprinting.",
@@ -362,5 +366,46 @@ class BaseAgent(ABC, BaseModel):
            self._rpm_controller = rpm_controller
            self.create_agent_executor()

-    def set_knowledge(self, crew_embedder: Optional[Dict[str, Any]] = None):
-        pass
+    def set_knowledge(
+        self, 
+        knowledge_sources: Optional[List[BaseKnowledgeSource]] = None, 
+        embedder_config: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Set knowledge sources for the agent with optional embedder configuration.
+        
+        This method allows agents to integrate external knowledge sources for enhanced
+        contextual understanding and information retrieval during task execution.
+        
+        Args:
+            knowledge_sources: List of knowledge sources to integrate. These can include
+                various data types such as text files, PDFs, CSV files, JSON files,
+                web pages, YouTube videos, and documentation websites.
+            embedder_config: Configuration for embedding generation. If not provided,
+                a default configuration will be used.
+        
+        Raises:
+            ValueError: If the provided knowledge sources are invalid.
+        """
+        try:
+            # Validate knowledge sources first
+            if knowledge_sources:
+                if not isinstance(knowledge_sources, list):
+                    raise ValueError("Knowledge sources must be a list")
+                
+                if not all(isinstance(k, BaseKnowledgeSource) for k in knowledge_sources):
+                    raise ValueError("All knowledge sources must be instances of BaseKnowledgeSource")
+                
+                self.knowledge_sources = knowledge_sources
+                
+            if embedder_config:
+                self.embedder_config = embedder_config
+                
+            if self.knowledge_sources:
+                knowledge_agent_name = f"{self.role.replace(' ', '_')}"
+                self.knowledge = Knowledge(
+                    sources=self.knowledge_sources,
+                    embedder_config=self.embedder_config,
+                    collection_name=knowledge_agent_name,
+                )
+        except (TypeError, ValueError) as e:
+            raise ValueError(f"Invalid Knowledge Configuration: {str(e)}")
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -1586,6 +1586,75 @@ def test_agent_execute_task_with_ollama():
    assert "AI" in result or "artificial intelligence" in result.lower()


+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_base_agent_set_knowledge():
+    """Test that set_knowledge correctly sets knowledge sources and creates a Knowledge object."""
+    from crewai.agents.agent_builder.base_agent import BaseAgent
+    from crewai.knowledge.knowledge import Knowledge
+    
+    # Create a test implementation of BaseAgent
+    class TestAgent(BaseAgent):
+        def execute_task(self, task, context=None, tools=None):
+            return "Test execution"
+            
+        def create_agent_executor(self, tools=None):
+            pass
+            
+        def _parse_tools(self, tools):
+            return tools
+            
+        def get_delegation_tools(self, agents):
+            return []
+            
+        def get_output_converter(self, llm, text, model, instructions):
+            return None
+    
+    # Create a knowledge source with some content
+    content = "The capital of France is Paris."
+    string_source = StringKnowledgeSource(content=content)
+    
+    # Create an agent
+    agent = TestAgent(
+        role="Test Agent",
+        goal="Test Goal",
+        backstory="Test Backstory",
+    )
+    
+    # Mock the Knowledge class to avoid API calls
+    with patch("crewai.agents.agent_builder.base_agent.Knowledge") as MockKnowledge:
+        mock_knowledge_instance = MockKnowledge.return_value
+        mock_knowledge_instance.sources = [string_source]
+        
+        # Test setting knowledge
+        agent.set_knowledge(knowledge_sources=[string_source])
+        
+        # Verify that knowledge was set correctly
+        assert agent.knowledge_sources == [string_source]
+        assert agent.knowledge is not None
+        assert MockKnowledge.called
+        assert MockKnowledge.call_args[1]["collection_name"] == "Test_Agent"
+        
+        # Test with embedder config
+        embedder_config = {
+            "provider": "openai",
+            "model": "text-embedding-3-small"
+        }
+        
+        agent.set_knowledge(
+            knowledge_sources=[string_source],
+            embedder_config=embedder_config
+        )
+        
+        assert agent.embedder_config == embedder_config
+        assert MockKnowledge.call_args[1]["embedder_config"] == embedder_config
+    
+    # Test with invalid knowledge source - we need to directly test the validation logic
+    # rather than relying on the Knowledge class to raise an error
+    with pytest.raises(ValueError):
+        # This will trigger the validation check in set_knowledge
+        agent.set_knowledge(knowledge_sources=["invalid source"])
+
+
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_agent_with_knowledge_sources():
    # Create a knowledge source with some content