From d993b5133c18f7a4015418553f2d10bd15f20bb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Sat, 28 Dec 2024 02:04:00 -0300 Subject: [PATCH] fixing file paths for knowledge source --- .../source/base_file_knowledge_source.py | 5 ++-- tests/knowledge/knowledge_test.py | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index 5743b1704..ac345b6a6 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -26,9 +26,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): safe_file_paths: List[Path] = Field(default_factory=list) @field_validator("file_path", "file_paths", mode="before") - def validate_file_path(cls, v, values): + def validate_file_path(cls, v, info): """Validate that at least one of file_path or file_paths is provided.""" - if v is None and ("file_path" not in values or values.get("file_path") is None): + # Single check if both are None, O(1) instead of nested conditions + if v is None and info.data.get("file_path" if info.field_name == "file_paths" else "file_paths") is None: raise ValueError("Either file_path or file_paths must be provided") return v diff --git a/tests/knowledge/knowledge_test.py b/tests/knowledge/knowledge_test.py index 366067587..6704d3031 100644 --- a/tests/knowledge/knowledge_test.py +++ b/tests/knowledge/knowledge_test.py @@ -584,3 +584,28 @@ def test_docling_source_with_local_file(): docling_source = CrewDoclingSource(file_paths=[pdf_path]) assert docling_source.file_paths == [pdf_path] assert docling_source.content is not None + + +def test_file_path_validation(): + """Test file path validation for knowledge sources.""" + current_dir = Path(__file__).parent + pdf_path = current_dir / "crewai_quickstart.pdf" + + # Test valid single file_path + source = PDFKnowledgeSource(file_path=pdf_path) + assert source.safe_file_paths == [pdf_path] + + # Test valid file_paths list + source = PDFKnowledgeSource(file_paths=[pdf_path]) + assert source.safe_file_paths == [pdf_path] + + # Test both file_path and file_paths provided (should use file_paths) + source = PDFKnowledgeSource(file_path=pdf_path, file_paths=[pdf_path]) + assert source.safe_file_paths == [pdf_path] + + # Test neither file_path nor file_paths provided + with pytest.raises( + ValueError, + match="file_path/file_paths must be a Path, str, or a list of these types" + ): + PDFKnowledgeSource()