fixing file paths for knowledge source

This commit is contained in:
João Moura
2024-12-28 02:04:00 -03:00
parent 289b56e122
commit d993b5133c
2 changed files with 28 additions and 2 deletions

View File

@@ -26,9 +26,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
safe_file_paths: List[Path] = Field(default_factory=list)
@field_validator("file_path", "file_paths", mode="before")
def validate_file_path(cls, v, values):
def validate_file_path(cls, v, info):
"""Validate that at least one of file_path or file_paths is provided."""
if v is None and ("file_path" not in values or values.get("file_path") is None):
# Single check if both are None, O(1) instead of nested conditions
if v is None and info.data.get("file_path" if info.field_name == "file_paths" else "file_paths") is None:
raise ValueError("Either file_path or file_paths must be provided")
return v

View File

@@ -584,3 +584,28 @@ def test_docling_source_with_local_file():
docling_source = CrewDoclingSource(file_paths=[pdf_path])
assert docling_source.file_paths == [pdf_path]
assert docling_source.content is not None
def test_file_path_validation():
"""Test file path validation for knowledge sources."""
current_dir = Path(__file__).parent
pdf_path = current_dir / "crewai_quickstart.pdf"
# Test valid single file_path
source = PDFKnowledgeSource(file_path=pdf_path)
assert source.safe_file_paths == [pdf_path]
# Test valid file_paths list
source = PDFKnowledgeSource(file_paths=[pdf_path])
assert source.safe_file_paths == [pdf_path]
# Test both file_path and file_paths provided (should use file_paths)
source = PDFKnowledgeSource(file_path=pdf_path, file_paths=[pdf_path])
assert source.safe_file_paths == [pdf_path]
# Test neither file_path nor file_paths provided
with pytest.raises(
ValueError,
match="file_path/file_paths must be a Path, str, or a list of these types"
):
PDFKnowledgeSource()