From 67772ca591be700898c3c1ba2f93b592c651a033 Mon Sep 17 00:00:00 2001 From: Brandon Hancock Date: Thu, 16 Jan 2025 11:29:26 -0500 Subject: [PATCH] Fix docling issues --- src/crewai/knowledge/source/crew_docling_source.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py index bbfcf9b92..6ca0ae967 100644 --- a/src/crewai/knowledge/source/crew_docling_source.py +++ b/src/crewai/knowledge/source/crew_docling_source.py @@ -8,6 +8,7 @@ try: from docling.exceptions import ConversionError from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker from docling_core.types.doc.document import DoclingDocument + DOCLING_AVAILABLE = True except ImportError: DOCLING_AVAILABLE = False @@ -38,8 +39,8 @@ class CrewDoclingSource(BaseKnowledgeSource): file_paths: List[Union[Path, str]] = Field(default_factory=list) chunks: List[str] = Field(default_factory=list) safe_file_paths: List[Union[Path, str]] = Field(default_factory=list) - content: List[DoclingDocument] = Field(default_factory=list) - document_converter: DocumentConverter = Field( + content: List["DoclingDocument"] = Field(default_factory=list) + document_converter: "DocumentConverter" = Field( default_factory=lambda: DocumentConverter( allowed_formats=[ InputFormat.MD, @@ -65,7 +66,7 @@ class CrewDoclingSource(BaseKnowledgeSource): self.safe_file_paths = self.validate_content() self.content = self._load_content() - def _load_content(self) -> List[DoclingDocument]: + def _load_content(self) -> List["DoclingDocument"]: try: return self._convert_source_to_docling_documents() except ConversionError as e: @@ -87,11 +88,11 @@ class CrewDoclingSource(BaseKnowledgeSource): self.chunks.extend(list(new_chunks_iterable)) self._save_documents() - def _convert_source_to_docling_documents(self) -> List[DoclingDocument]: + def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]: conv_results_iter = self.document_converter.convert_all(self.safe_file_paths) return [result.document for result in conv_results_iter] - def _chunk_doc(self, doc: DoclingDocument) -> Iterator[str]: + def _chunk_doc(self, doc: "DoclingDocument") -> Iterator[str]: chunker = HierarchicalChunker() for chunk in chunker.chunk(doc): yield chunk.text