diff --git a/src/crewai/knowledge/source/docling_source.py b/src/crewai/knowledge/source/docling_source.py index cc751d2f8..7257cac7f 100644 --- a/src/crewai/knowledge/source/docling_source.py +++ b/src/crewai/knowledge/source/docling_source.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Iterator, List, Union +from typing import Iterator, List, Optional, Union from urllib.parse import urlparse from docling.datamodel.base_models import InputFormat @@ -20,8 +20,8 @@ class DoclingSource(BaseKnowledgeSource): _logger: Logger = Logger(verbose=True) - file_path: List[str] = Field(default=None) - file_paths: List[str] = Field(default_factory=list) + file_path: Optional[List[Union[Path, str]]] = Field(default=None) + file_paths: List[Union[Path, str]] = Field(default_factory=list) document_converter: DocumentConverter = Field(default_factory=DocumentConverter) chunks: List[str] = Field(default_factory=list) safe_file_paths: List[Union[Path, str]] = Field(default_factory=list) diff --git a/tests/knowledge/knowledge_test.py b/tests/knowledge/knowledge_test.py index 7942b5e69..e13ebcaef 100644 --- a/tests/knowledge/knowledge_test.py +++ b/tests/knowledge/knowledge_test.py @@ -1,6 +1,7 @@ """Test Knowledge creation and querying functionality.""" from pathlib import Path +from typing import List, Union from unittest.mock import patch import pytest @@ -567,7 +568,7 @@ def test_docling_source(mock_vector_db): def test_multiple_docling_sources(): - urls = [ + urls: List[Union[Path, str]] = [ "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/", "https://lilianweng.github.io/posts/2024-07-07-hallucination/", ] @@ -580,6 +581,6 @@ def test_multiple_docling_sources(): def test_docling_source_with_local_file(): current_dir = Path(__file__).parent pdf_path = current_dir / "crewai_quickstart.pdf" - docling_source = DoclingSource(file_paths=[str(pdf_path.name)]) - assert docling_source.file_paths == [str(pdf_path.name)] + docling_source = DoclingSource(file_paths=[pdf_path]) + assert docling_source.file_paths == [pdf_path] assert docling_source.content is not None