mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
Fix issue #3764: Implement lazy loading for knowledge sources
This commit fixes a bug where knowledge sources were being loaded eagerly during agent/crew initialization, causing authentication errors (401) when users didn't have proper credentials configured. Changes: - Modified Knowledge class to use lazy loading pattern - Added _sources_loaded private attribute to track loading state - Knowledge sources are now loaded only when first queried - Removed eager add_sources() calls from agent.set_knowledge() and crew.create_crew_knowledge() - Added comprehensive tests for lazy loading behavior The fix ensures that: 1. Knowledge sources don't require authentication during initialization 2. Sources are loaded on-demand when actually needed (first query) 3. Subsequent queries don't reload sources 4. Explicit add_sources() calls still work as expected Fixes #3764 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -239,7 +239,6 @@ class Agent(BaseAgent):
|
||||
embedder=self.embedder,
|
||||
collection_name=self.role,
|
||||
)
|
||||
self.knowledge.add_sources()
|
||||
except (TypeError, ValueError) as e:
|
||||
raise ValueError(f"Invalid Knowledge Configuration: {e!s}") from e
|
||||
|
||||
|
||||
@@ -371,7 +371,6 @@ class Crew(FlowTrackable, BaseModel):
|
||||
embedder=self.embedder,
|
||||
collection_name="crew",
|
||||
)
|
||||
self.knowledge.add_sources()
|
||||
|
||||
except Exception as e:
|
||||
self._logger.log(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||
@@ -25,6 +25,7 @@ class Knowledge(BaseModel):
|
||||
storage: KnowledgeStorage | None = Field(default=None)
|
||||
embedder: EmbedderConfig | None = None
|
||||
collection_name: str | None = None
|
||||
_sources_loaded: bool = PrivateAttr(default=False)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -56,6 +57,10 @@ class Knowledge(BaseModel):
|
||||
if self.storage is None:
|
||||
raise ValueError("Storage is not initialized.")
|
||||
|
||||
if not self._sources_loaded:
|
||||
self.add_sources()
|
||||
self._sources_loaded = True
|
||||
|
||||
return self.storage.search(
|
||||
query,
|
||||
limit=results_limit,
|
||||
@@ -67,6 +72,7 @@ class Knowledge(BaseModel):
|
||||
for source in self.sources:
|
||||
source.storage = self.storage
|
||||
source.add()
|
||||
self._sources_loaded = True
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
137
lib/crewai/tests/knowledge/test_lazy_loading.py
Normal file
137
lib/crewai/tests/knowledge/test_lazy_loading.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Test lazy loading of knowledge sources to prevent premature authentication errors."""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai import Agent, Crew, Task
|
||||
from crewai.knowledge.knowledge import Knowledge
|
||||
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
|
||||
|
||||
|
||||
def test_knowledge_sources_not_loaded_during_initialization(tmpdir):
|
||||
"""Test that knowledge sources are not loaded during agent/crew initialization."""
|
||||
# Create a test file
|
||||
test_file = Path(tmpdir) / "test.txt"
|
||||
test_file.write_text("Test content")
|
||||
|
||||
# Create knowledge source
|
||||
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
|
||||
|
||||
# Mock the storage to avoid actual database operations
|
||||
with patch('crewai.knowledge.knowledge.KnowledgeStorage'):
|
||||
# Create Knowledge object
|
||||
knowledge = Knowledge(
|
||||
collection_name="test",
|
||||
sources=[knowledge_source],
|
||||
embedder=None
|
||||
)
|
||||
|
||||
# Verify that sources are not loaded yet
|
||||
assert knowledge._sources_loaded is False
|
||||
|
||||
|
||||
def test_knowledge_sources_loaded_on_first_query(tmpdir):
|
||||
"""Test that knowledge sources are loaded only when first queried."""
|
||||
# Create a test file
|
||||
test_file = Path(tmpdir) / "test.txt"
|
||||
test_file.write_text("Test content")
|
||||
|
||||
# Create knowledge source
|
||||
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
|
||||
|
||||
# Mock the storage to avoid actual database operations
|
||||
with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage:
|
||||
mock_storage = MagicMock()
|
||||
mock_storage.search.return_value = []
|
||||
MockStorage.return_value = mock_storage
|
||||
|
||||
# Create Knowledge object
|
||||
knowledge = Knowledge(
|
||||
collection_name="test",
|
||||
sources=[knowledge_source],
|
||||
embedder=None
|
||||
)
|
||||
|
||||
# Verify sources not loaded yet
|
||||
assert knowledge._sources_loaded is False
|
||||
|
||||
with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources:
|
||||
# Query should trigger loading
|
||||
knowledge.query(["test query"])
|
||||
|
||||
# Verify add_sources was called
|
||||
mock_add_sources.assert_called_once()
|
||||
|
||||
# Verify sources are now marked as loaded
|
||||
assert knowledge._sources_loaded is True
|
||||
|
||||
# Query again - add_sources should not be called again
|
||||
with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources:
|
||||
knowledge.query(["another query"])
|
||||
mock_add_sources.assert_not_called()
|
||||
|
||||
|
||||
def test_agent_with_knowledge_sources_no_immediate_loading(tmpdir):
|
||||
"""Test that creating an agent with knowledge sources doesn't immediately load them."""
|
||||
# Create a test file
|
||||
test_file = Path(tmpdir) / "test.txt"
|
||||
test_file.write_text("Test content")
|
||||
|
||||
# Create knowledge source
|
||||
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
|
||||
|
||||
# Mock the storage to avoid authentication errors
|
||||
with patch('crewai.knowledge.knowledge.KnowledgeStorage'):
|
||||
# Create agent with knowledge source
|
||||
agent = Agent(
|
||||
role="Test Agent",
|
||||
goal="Test goal",
|
||||
backstory="Test backstory",
|
||||
knowledge_sources=[knowledge_source],
|
||||
)
|
||||
|
||||
# Create task and crew
|
||||
task = Task(
|
||||
description="Test task",
|
||||
expected_output="Test output",
|
||||
agent=agent
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[agent],
|
||||
tasks=[task],
|
||||
)
|
||||
|
||||
# but sources should not be loaded yet
|
||||
if agent.knowledge is not None:
|
||||
assert agent.knowledge._sources_loaded is False
|
||||
|
||||
|
||||
def test_knowledge_add_sources_can_still_be_called_explicitly():
|
||||
"""Test that add_sources can still be called explicitly if needed."""
|
||||
# Create a mock knowledge source
|
||||
mock_source = MagicMock()
|
||||
mock_source.add = MagicMock()
|
||||
|
||||
# Mock the storage
|
||||
with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage:
|
||||
mock_storage = MagicMock()
|
||||
MockStorage.return_value = mock_storage
|
||||
|
||||
# Create Knowledge object
|
||||
knowledge = Knowledge(
|
||||
collection_name="test",
|
||||
sources=[mock_source],
|
||||
embedder=None
|
||||
)
|
||||
|
||||
# Explicitly call add_sources
|
||||
knowledge.add_sources()
|
||||
|
||||
# Verify add was called
|
||||
mock_source.add.assert_called_once()
|
||||
|
||||
# Verify sources are marked as loaded
|
||||
assert knowledge._sources_loaded is True
|
||||
Reference in New Issue
Block a user