fix test and types

This commit is contained in:
Lorenze Jay
2024-12-16 21:52:36 -08:00
parent 0921f71fd2
commit e14a49f82c
2 changed files with 7 additions and 6 deletions

View File

@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Iterator, List, Union
from typing import Iterator, List, Optional, Union
from urllib.parse import urlparse
from docling.datamodel.base_models import InputFormat
@@ -20,8 +20,8 @@ class DoclingSource(BaseKnowledgeSource):
_logger: Logger = Logger(verbose=True)
file_path: List[str] = Field(default=None)
file_paths: List[str] = Field(default_factory=list)
file_path: Optional[List[Union[Path, str]]] = Field(default=None)
file_paths: List[Union[Path, str]] = Field(default_factory=list)
document_converter: DocumentConverter = Field(default_factory=DocumentConverter)
chunks: List[str] = Field(default_factory=list)
safe_file_paths: List[Union[Path, str]] = Field(default_factory=list)

View File

@@ -1,6 +1,7 @@
"""Test Knowledge creation and querying functionality."""
from pathlib import Path
from typing import List, Union
from unittest.mock import patch
import pytest
@@ -567,7 +568,7 @@ def test_docling_source(mock_vector_db):
def test_multiple_docling_sources():
urls = [
urls: List[Union[Path, str]] = [
"https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
"https://lilianweng.github.io/posts/2024-07-07-hallucination/",
]
@@ -580,6 +581,6 @@ def test_multiple_docling_sources():
def test_docling_source_with_local_file():
current_dir = Path(__file__).parent
pdf_path = current_dir / "crewai_quickstart.pdf"
docling_source = DoclingSource(file_paths=[str(pdf_path.name)])
assert docling_source.file_paths == [str(pdf_path.name)]
docling_source = DoclingSource(file_paths=[pdf_path])
assert docling_source.file_paths == [pdf_path]
assert docling_source.content is not None