diff --git a/src/crewai/knowledge/source/docling_source.py b/src/crewai/knowledge/source/docling_source.py index 9741e397d..22a598982 100644 --- a/src/crewai/knowledge/source/docling_source.py +++ b/src/crewai/knowledge/source/docling_source.py @@ -25,7 +25,7 @@ class DoclingSource(BaseKnowledgeSource): file_paths: List[str] = Field(default_factory=list) document_converter: DocumentConverter = Field(default_factory=DocumentConverter) chunks: List[str] = Field(default_factory=list) - safe_file_paths: List[str] = Field(default_factory=list) + safe_file_paths: List[Union[Path, str]] = Field(default_factory=list) content: List[DoclingDocument] = Field(default_factory=list) def model_post_init(self, _) -> None: @@ -75,8 +75,8 @@ class DoclingSource(BaseKnowledgeSource): for chunk in chunker.chunk(doc): yield chunk.text - def _process_file_paths(self) -> List[str]: - processed_paths = [] + def _process_file_paths(self) -> List[Union[Path, str]]: + processed_paths: List[Union[Path, str]] = [] for path in self.file_paths: if isinstance(path, str): if path.startswith(("http://", "https://")): @@ -90,7 +90,7 @@ class DoclingSource(BaseKnowledgeSource): else: local_path = Path(KNOWLEDGE_DIRECTORY + "/" + path) if local_path.exists(): - processed_paths.append(local_path.name) + processed_paths.append(local_path) else: raise FileNotFoundError(f"File not found: {local_path}") else: diff --git a/tests/knowledge/knowledge_test.py b/tests/knowledge/knowledge_test.py index 2602a5e3c..7942b5e69 100644 --- a/tests/knowledge/knowledge_test.py +++ b/tests/knowledge/knowledge_test.py @@ -575,3 +575,11 @@ def test_multiple_docling_sources(): assert docling_source.file_paths == urls assert docling_source.content is not None + + +def test_docling_source_with_local_file(): + current_dir = Path(__file__).parent + pdf_path = current_dir / "crewai_quickstart.pdf" + docling_source = DoclingSource(file_paths=[str(pdf_path.name)]) + assert docling_source.file_paths == [str(pdf_path.name)] + assert docling_source.content is not None