mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 16:18:30 +00:00
Fix issue #3764: Implement lazy loading for knowledge sources
This commit fixes a bug where knowledge sources were being loaded eagerly during agent/crew initialization, causing authentication errors (401) when users didn't have proper credentials configured. Changes: - Modified Knowledge class to use lazy loading pattern - Added _sources_loaded private attribute to track loading state - Knowledge sources are now loaded only when first queried - Removed eager add_sources() calls from agent.set_knowledge() and crew.create_crew_knowledge() - Added comprehensive tests for lazy loading behavior The fix ensures that: 1. Knowledge sources don't require authentication during initialization 2. Sources are loaded on-demand when actually needed (first query) 3. Subsequent queries don't reload sources 4. Explicit add_sources() calls still work as expected Fixes #3764 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -239,7 +239,6 @@ class Agent(BaseAgent):
|
|||||||
embedder=self.embedder,
|
embedder=self.embedder,
|
||||||
collection_name=self.role,
|
collection_name=self.role,
|
||||||
)
|
)
|
||||||
self.knowledge.add_sources()
|
|
||||||
except (TypeError, ValueError) as e:
|
except (TypeError, ValueError) as e:
|
||||||
raise ValueError(f"Invalid Knowledge Configuration: {e!s}") from e
|
raise ValueError(f"Invalid Knowledge Configuration: {e!s}") from e
|
||||||
|
|
||||||
|
|||||||
@@ -371,7 +371,6 @@ class Crew(FlowTrackable, BaseModel):
|
|||||||
embedder=self.embedder,
|
embedder=self.embedder,
|
||||||
collection_name="crew",
|
collection_name="crew",
|
||||||
)
|
)
|
||||||
self.knowledge.add_sources()
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._logger.log(
|
self._logger.log(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||||
|
|
||||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||||
@@ -25,6 +25,7 @@ class Knowledge(BaseModel):
|
|||||||
storage: KnowledgeStorage | None = Field(default=None)
|
storage: KnowledgeStorage | None = Field(default=None)
|
||||||
embedder: EmbedderConfig | None = None
|
embedder: EmbedderConfig | None = None
|
||||||
collection_name: str | None = None
|
collection_name: str | None = None
|
||||||
|
_sources_loaded: bool = PrivateAttr(default=False)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -56,6 +57,10 @@ class Knowledge(BaseModel):
|
|||||||
if self.storage is None:
|
if self.storage is None:
|
||||||
raise ValueError("Storage is not initialized.")
|
raise ValueError("Storage is not initialized.")
|
||||||
|
|
||||||
|
if not self._sources_loaded:
|
||||||
|
self.add_sources()
|
||||||
|
self._sources_loaded = True
|
||||||
|
|
||||||
return self.storage.search(
|
return self.storage.search(
|
||||||
query,
|
query,
|
||||||
limit=results_limit,
|
limit=results_limit,
|
||||||
@@ -67,6 +72,7 @@ class Knowledge(BaseModel):
|
|||||||
for source in self.sources:
|
for source in self.sources:
|
||||||
source.storage = self.storage
|
source.storage = self.storage
|
||||||
source.add()
|
source.add()
|
||||||
|
self._sources_loaded = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|||||||
137
lib/crewai/tests/knowledge/test_lazy_loading.py
Normal file
137
lib/crewai/tests/knowledge/test_lazy_loading.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
"""Test lazy loading of knowledge sources to prevent premature authentication errors."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from crewai import Agent, Crew, Task
|
||||||
|
from crewai.knowledge.knowledge import Knowledge
|
||||||
|
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_sources_not_loaded_during_initialization(tmpdir):
|
||||||
|
"""Test that knowledge sources are not loaded during agent/crew initialization."""
|
||||||
|
# Create a test file
|
||||||
|
test_file = Path(tmpdir) / "test.txt"
|
||||||
|
test_file.write_text("Test content")
|
||||||
|
|
||||||
|
# Create knowledge source
|
||||||
|
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
|
||||||
|
|
||||||
|
# Mock the storage to avoid actual database operations
|
||||||
|
with patch('crewai.knowledge.knowledge.KnowledgeStorage'):
|
||||||
|
# Create Knowledge object
|
||||||
|
knowledge = Knowledge(
|
||||||
|
collection_name="test",
|
||||||
|
sources=[knowledge_source],
|
||||||
|
embedder=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify that sources are not loaded yet
|
||||||
|
assert knowledge._sources_loaded is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_sources_loaded_on_first_query(tmpdir):
|
||||||
|
"""Test that knowledge sources are loaded only when first queried."""
|
||||||
|
# Create a test file
|
||||||
|
test_file = Path(tmpdir) / "test.txt"
|
||||||
|
test_file.write_text("Test content")
|
||||||
|
|
||||||
|
# Create knowledge source
|
||||||
|
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
|
||||||
|
|
||||||
|
# Mock the storage to avoid actual database operations
|
||||||
|
with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage:
|
||||||
|
mock_storage = MagicMock()
|
||||||
|
mock_storage.search.return_value = []
|
||||||
|
MockStorage.return_value = mock_storage
|
||||||
|
|
||||||
|
# Create Knowledge object
|
||||||
|
knowledge = Knowledge(
|
||||||
|
collection_name="test",
|
||||||
|
sources=[knowledge_source],
|
||||||
|
embedder=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify sources not loaded yet
|
||||||
|
assert knowledge._sources_loaded is False
|
||||||
|
|
||||||
|
with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources:
|
||||||
|
# Query should trigger loading
|
||||||
|
knowledge.query(["test query"])
|
||||||
|
|
||||||
|
# Verify add_sources was called
|
||||||
|
mock_add_sources.assert_called_once()
|
||||||
|
|
||||||
|
# Verify sources are now marked as loaded
|
||||||
|
assert knowledge._sources_loaded is True
|
||||||
|
|
||||||
|
# Query again - add_sources should not be called again
|
||||||
|
with patch.object(Knowledge, 'add_sources', wraps=knowledge.add_sources) as mock_add_sources:
|
||||||
|
knowledge.query(["another query"])
|
||||||
|
mock_add_sources.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_agent_with_knowledge_sources_no_immediate_loading(tmpdir):
|
||||||
|
"""Test that creating an agent with knowledge sources doesn't immediately load them."""
|
||||||
|
# Create a test file
|
||||||
|
test_file = Path(tmpdir) / "test.txt"
|
||||||
|
test_file.write_text("Test content")
|
||||||
|
|
||||||
|
# Create knowledge source
|
||||||
|
knowledge_source = TextFileKnowledgeSource(file_paths=[test_file])
|
||||||
|
|
||||||
|
# Mock the storage to avoid authentication errors
|
||||||
|
with patch('crewai.knowledge.knowledge.KnowledgeStorage'):
|
||||||
|
# Create agent with knowledge source
|
||||||
|
agent = Agent(
|
||||||
|
role="Test Agent",
|
||||||
|
goal="Test goal",
|
||||||
|
backstory="Test backstory",
|
||||||
|
knowledge_sources=[knowledge_source],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create task and crew
|
||||||
|
task = Task(
|
||||||
|
description="Test task",
|
||||||
|
expected_output="Test output",
|
||||||
|
agent=agent
|
||||||
|
)
|
||||||
|
|
||||||
|
crew = Crew(
|
||||||
|
agents=[agent],
|
||||||
|
tasks=[task],
|
||||||
|
)
|
||||||
|
|
||||||
|
# but sources should not be loaded yet
|
||||||
|
if agent.knowledge is not None:
|
||||||
|
assert agent.knowledge._sources_loaded is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_add_sources_can_still_be_called_explicitly():
|
||||||
|
"""Test that add_sources can still be called explicitly if needed."""
|
||||||
|
# Create a mock knowledge source
|
||||||
|
mock_source = MagicMock()
|
||||||
|
mock_source.add = MagicMock()
|
||||||
|
|
||||||
|
# Mock the storage
|
||||||
|
with patch('crewai.knowledge.knowledge.KnowledgeStorage') as MockStorage:
|
||||||
|
mock_storage = MagicMock()
|
||||||
|
MockStorage.return_value = mock_storage
|
||||||
|
|
||||||
|
# Create Knowledge object
|
||||||
|
knowledge = Knowledge(
|
||||||
|
collection_name="test",
|
||||||
|
sources=[mock_source],
|
||||||
|
embedder=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Explicitly call add_sources
|
||||||
|
knowledge.add_sources()
|
||||||
|
|
||||||
|
# Verify add was called
|
||||||
|
mock_source.add.assert_called_once()
|
||||||
|
|
||||||
|
# Verify sources are marked as loaded
|
||||||
|
assert knowledge._sources_loaded is True
|
||||||
Reference in New Issue
Block a user