From 205e778d74a3401bbd4609c792d526bc17aa5659 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 16 Apr 2025 12:05:39 +0000 Subject: [PATCH] Fix: Implement add() method in BaseFileKnowledgeSource to solve PDFKnowledgeSource instantiation error Co-Authored-By: Joe Moura --- .../source/base_file_knowledge_source.py | 10 ++++++++++ .../test_knowledge_source_instantiation.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tests/knowledge/test_knowledge_source_instantiation.py diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index 8cee77e16..9418671cb 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -64,6 +64,16 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): """Save the documents to the storage.""" self.storage.save(self.chunks) + def add(self) -> None: + """ + Process content from files, chunk it, compute embeddings, and save them. + This method is called after content is loaded from files. + """ + for _, text in self.content.items(): + new_chunks = self._chunk_text(text) + self.chunks.extend(new_chunks) + self._save_documents() + def convert_to_path(self, path: Union[Path, str]) -> Path: """Convert a path to a Path object.""" return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path diff --git a/tests/knowledge/test_knowledge_source_instantiation.py b/tests/knowledge/test_knowledge_source_instantiation.py new file mode 100644 index 000000000..c42543796 --- /dev/null +++ b/tests/knowledge/test_knowledge_source_instantiation.py @@ -0,0 +1,16 @@ +from pathlib import Path +import pytest +from unittest.mock import patch +from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource + +@patch('crewai.knowledge.source.base_file_knowledge_source.BaseFileKnowledgeSource.validate_content') +@patch('crewai.knowledge.source.pdf_knowledge_source.PDFKnowledgeSource.load_content') +def test_pdf_knowledge_source_instantiation(mock_load_content, mock_validate_content, tmp_path): + """Test that PDFKnowledgeSource can be instantiated without errors.""" + mock_load_content.return_value = {} + + pdf_path = tmp_path / "test.pdf" + pdf_path.touch() # Create the file + + pdf_source = PDFKnowledgeSource(file_paths=[pdf_path]) + assert isinstance(pdf_source, PDFKnowledgeSource)