diff --git a/lib/crewai/src/crewai/agent.py b/lib/crewai/src/crewai/agent.py index 1b1383169..3a273655b 100644 --- a/lib/crewai/src/crewai/agent.py +++ b/lib/crewai/src/crewai/agent.py @@ -239,7 +239,6 @@ class Agent(BaseAgent): embedder=self.embedder, collection_name=self.role, ) - self.knowledge.add_sources() except (TypeError, ValueError) as e: raise ValueError(f"Invalid Knowledge Configuration: {e!s}") from e diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index a8e88ce55..22dc18035 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -371,7 +371,6 @@ class Crew(FlowTrackable, BaseModel): embedder=self.embedder, collection_name="crew", ) - self.knowledge.add_sources() except Exception as e: self._logger.log( diff --git a/lib/crewai/src/crewai/knowledge/knowledge.py b/lib/crewai/src/crewai/knowledge/knowledge.py index cb53ab3d6..5a0930f02 100644 --- a/lib/crewai/src/crewai/knowledge/knowledge.py +++ b/lib/crewai/src/crewai/knowledge/knowledge.py @@ -1,6 +1,6 @@ import os -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage @@ -25,6 +25,7 @@ class Knowledge(BaseModel): storage: KnowledgeStorage | None = Field(default=None) embedder: EmbedderConfig | None = None collection_name: str | None = None + _sources_loaded: bool = PrivateAttr(default=False) def __init__( self, @@ -56,6 +57,10 @@ class Knowledge(BaseModel): if self.storage is None: raise ValueError("Storage is not initialized.") + if not self._sources_loaded: + self.add_sources() + self._sources_loaded = True + return self.storage.search( query, limit=results_limit, @@ -67,6 +72,7 @@ class Knowledge(BaseModel): for source in self.sources: source.storage = self.storage source.add() + self._sources_loaded = True except Exception as e: raise e diff --git a/lib/crewai/tests/knowledge/test_lazy_loading.py b/lib/crewai/tests/knowledge/test_lazy_loading.py new file mode 100644 index 000000000..4869373ef --- /dev/null +++ b/lib/crewai/tests/knowledge/test_lazy_loading.py @@ -0,0 +1,137 @@ +"""Test lazy loading of knowledge sources to prevent premature authentication errors.""" + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from crewai import Agent, Crew, Task +from crewai.knowledge.knowledge import Knowledge +from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource + + +def test_knowledge_sources_not_loaded_during_initialization(tmpdir): + """Test that knowledge sources are not loaded during agent/crew initialization.""" + # Create a test file + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Test content") + + # Create knowledge source + knowledge_source = TextFileKnowledgeSource(file_paths=[test_file]) + + # Mock the storage to avoid actual database operations + with patch('crewai.knowledge.knowledge.KnowledgeStorage'): + # Create Knowledge object + knowledge = Knowledge( + collection_name="test", + sources=[knowledge_source], + embedder=None + ) + + # Verify that sources are not loaded yet + assert knowledge._sources_loaded is False + + +def test_knowledge_sources_loaded_on_first_query(tmpdir): + """Test that knowledge sources are loaded only when first queried.""" + # Create a test file + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Test content") + + # Create knowledge source + knowledge_source = TextFileKnowledgeSource(file_paths=[test_file]) + + # Mock the storage to avoid actual database operations + with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage: + mock_storage = MagicMock() + mock_storage.search.return_value = [] + MockStorage.return_value = mock_storage + + # Create Knowledge object + knowledge = Knowledge( + collection_name="test", + sources=[knowledge_source], + embedder=None + ) + + # Verify sources not loaded yet + assert knowledge._sources_loaded is False + + with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources: + # Query should trigger loading + knowledge.query(["test query"]) + + # Verify add_sources was called + mock_add_sources.assert_called_once() + + # Verify sources are now marked as loaded + assert knowledge._sources_loaded is True + + # Query again - add_sources should not be called again + with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources: + knowledge.query(["another query"]) + mock_add_sources.assert_not_called() + + +def test_agent_with_knowledge_sources_no_immediate_loading(tmpdir): + """Test that creating an agent with knowledge sources doesn't immediately load them.""" + # Create a test file + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Test content") + + # Create knowledge source + knowledge_source = TextFileKnowledgeSource(file_paths=[test_file]) + + # Mock the storage to avoid authentication errors + with patch('crewai.knowledge.knowledge.KnowledgeStorage'): + # Create agent with knowledge source + agent = Agent( + role="Test Agent", + goal="Test goal", + backstory="Test backstory", + knowledge_sources=[knowledge_source], + ) + + # Create task and crew + task = Task( + description="Test task", + expected_output="Test output", + agent=agent + ) + + crew = Crew( + agents=[agent], + tasks=[task], + ) + + # but sources should not be loaded yet + if agent.knowledge is not None: + assert agent.knowledge._sources_loaded is False + + +def test_knowledge_add_sources_can_still_be_called_explicitly(): + """Test that add_sources can still be called explicitly if needed.""" + # Create a mock knowledge source + mock_source = MagicMock() + mock_source.add = MagicMock() + + # Mock the storage + with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage: + mock_storage = MagicMock() + MockStorage.return_value = mock_storage + + # Create Knowledge object + knowledge = Knowledge( + collection_name="test", + sources=[mock_source], + embedder=None + ) + + # Explicitly call add_sources + knowledge.add_sources() + + # Verify add was called + mock_source.add.assert_called_once() + + # Verify sources are marked as loaded + assert knowledge._sources_loaded is True