fix: Initialize storage in StringKnowledgeSource

- Add storage initialization in model_post_init
- Add test coverage for StringKnowledgeSource
- Fixes #2150

Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
Devin AI
2025-02-17 08:16:19 +00:00
parent 1b488b6da7
commit 1d3fb97eba
2 changed files with 30 additions and 1 deletions

View File

@@ -12,8 +12,12 @@ class StringKnowledgeSource(BaseKnowledgeSource):
collection_name: Optional[str] = Field(default=None)
def model_post_init(self, _):
"""Post-initialization method to validate content."""
"""Post-initialization method to validate content and initialize storage."""
self.validate_content()
if self.storage is None:
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
self.storage = KnowledgeStorage(collection_name=self.collection_name)
self.storage.initialize_knowledge_storage()
def validate_content(self):
"""Validate string content."""

View File

@@ -37,6 +37,28 @@ def reset_knowledge_storage(mock_vector_db):
yield
def test_string_knowledge_source(mock_vector_db):
"""Test StringKnowledgeSource with simple text content."""
content = "Users name is John. He is 30 years old and lives in San Francisco."
string_source = StringKnowledgeSource(content=content)
mock_vector_db.sources = [string_source]
mock_vector_db.query.return_value = [{"context": content, "score": 0.9}]
# Test initialization
assert string_source.content == content
# Test adding content
string_source.add()
assert len(string_source.chunks) > 0
# Test querying
query = "Where does John live?"
results = mock_vector_db.query(query)
assert len(results) > 0
assert "San Francisco" in results[0]["context"]
mock_vector_db.query.assert_called_once()
def test_single_short_string(mock_vector_db):
# Create a knowledge base with a single short string
content = "Brandon's favorite color is blue and he likes Mexican food."
@@ -418,6 +440,9 @@ def test_hybrid_string_and_files(mock_vector_db, tmpdir):
mock_vector_db.query.assert_called_once()
def test_pdf_knowledge_source(mock_vector_db):
# Get the directory of the current file
current_dir = Path(__file__).parent