Fix issue #3764: Implement lazy loading for knowledge sources

This commit fixes a bug where knowledge sources were being loaded eagerly
during agent/crew initialization, causing authentication errors (401) when
users didn't have proper credentials configured.

Changes:
- Modified Knowledge class to use lazy loading pattern
- Added _sources_loaded private attribute to track loading state
- Knowledge sources are now loaded only when first queried
- Removed eager add_sources() calls from agent.set_knowledge() and crew.create_crew_knowledge()
- Added comprehensive tests for lazy loading behavior

The fix ensures that:
1. Knowledge sources don't require authentication during initialization
2. Sources are loaded on-demand when actually needed (first query)
3. Subsequent queries don't reload sources
4. Explicit add_sources() calls still work as expected

Fixes #3764

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-10-21 20:23:17 +00:00
parent f6e13eb890
commit e94de13f06
4 changed files with 144 additions and 3 deletions

View File

@@ -239,7 +239,6 @@ class Agent(BaseAgent):
embedder=self.embedder,
collection_name=self.role,
)
self.knowledge.add_sources()
except (TypeError, ValueError) as e:
raise ValueError(f"Invalid Knowledge Configuration: {e!s}") from e

View File

@@ -371,7 +371,6 @@ class Crew(FlowTrackable, BaseModel):
embedder=self.embedder,
collection_name="crew",
)
self.knowledge.add_sources()
except Exception as e:
self._logger.log(

View File

@@ -1,6 +1,6 @@
import os
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
@@ -25,6 +25,7 @@ class Knowledge(BaseModel):
storage: KnowledgeStorage | None = Field(default=None)
embedder: EmbedderConfig | None = None
collection_name: str | None = None
_sources_loaded: bool = PrivateAttr(default=False)
def __init__(
self,
@@ -56,6 +57,10 @@ class Knowledge(BaseModel):
if self.storage is None:
raise ValueError("Storage is not initialized.")
if not self._sources_loaded:
self.add_sources()
self._sources_loaded = True
return self.storage.search(
query,
limit=results_limit,
@@ -67,6 +72,7 @@ class Knowledge(BaseModel):
for source in self.sources:
source.storage = self.storage
source.add()
self._sources_loaded = True
except Exception as e:
raise e

View File

@@ -0,0 +1,137 @@
"""Test lazy loading of knowledge sources to prevent premature authentication errors."""
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from crewai import Agent, Crew, Task
from crewai.knowledge.knowledge import Knowledge
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
def test_knowledge_sources_not_loaded_during_initialization(tmpdir):
"""Test that knowledge sources are not loaded during agent/crew initialization."""
# Create a test file
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Test content")
# Create knowledge source
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
# Mock the storage to avoid actual database operations
with patch('crewai.knowledge.knowledge.KnowledgeStorage'):
# Create Knowledge object
knowledge = Knowledge(
collection_name="test",
sources=[knowledge_source],
embedder=None
)
# Verify that sources are not loaded yet
assert knowledge._sources_loaded is False
def test_knowledge_sources_loaded_on_first_query(tmpdir):
"""Test that knowledge sources are loaded only when first queried."""
# Create a test file
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Test content")
# Create knowledge source
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
# Mock the storage to avoid actual database operations
with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage:
mock_storage = MagicMock()
mock_storage.search.return_value = []
MockStorage.return_value = mock_storage
# Create Knowledge object
knowledge = Knowledge(
collection_name="test",
sources=[knowledge_source],
embedder=None
)
# Verify sources not loaded yet
assert knowledge._sources_loaded is False
with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources:
# Query should trigger loading
knowledge.query(["test query"])
# Verify add_sources was called
mock_add_sources.assert_called_once()
# Verify sources are now marked as loaded
assert knowledge._sources_loaded is True
# Query again - add_sources should not be called again
with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources:
knowledge.query(["another query"])
mock_add_sources.assert_not_called()
def test_agent_with_knowledge_sources_no_immediate_loading(tmpdir):
"""Test that creating an agent with knowledge sources doesn't immediately load them."""
# Create a test file
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Test content")
# Create knowledge source
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
# Mock the storage to avoid authentication errors
with patch('crewai.knowledge.knowledge.KnowledgeStorage'):
# Create agent with knowledge source
agent = Agent(
role="Test Agent",
goal="Test goal",
backstory="Test backstory",
knowledge_sources=[knowledge_source],
)
# Create task and crew
task = Task(
description="Test task",
expected_output="Test output",
agent=agent
)
crew = Crew(
agents=[agent],
tasks=[task],
)
# but sources should not be loaded yet
if agent.knowledge is not None:
assert agent.knowledge._sources_loaded is False
def test_knowledge_add_sources_can_still_be_called_explicitly():
"""Test that add_sources can still be called explicitly if needed."""
# Create a mock knowledge source
mock_source = MagicMock()
mock_source.add = MagicMock()
# Mock the storage
with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage:
mock_storage = MagicMock()
MockStorage.return_value = mock_storage
# Create Knowledge object
knowledge = Knowledge(
collection_name="test",
sources=[mock_source],
embedder=None
)
# Explicitly call add_sources
knowledge.add_sources()
# Verify add was called
mock_source.add.assert_called_once()
# Verify sources are marked as loaded
assert knowledge._sources_loaded is True