preparing new version

This commit is contained in:
João Moura
2025-01-03 12:42:47 -03:00
parent bfe2c44f55
commit d1e2430aac
6 changed files with 383 additions and 180 deletions

View File

@@ -2,11 +2,16 @@ from pathlib import Path
from typing import Iterator, List, Optional, Union
from urllib.parse import urlparse
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter
from docling.exceptions import ConversionError
from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
from docling_core.types.doc.document import DoclingDocument
try:
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter
from docling.exceptions import ConversionError
from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
from docling_core.types.doc.document import DoclingDocument
DOCLING_AVAILABLE = True
except ImportError:
DOCLING_AVAILABLE = False
from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
@@ -19,6 +24,14 @@ class CrewDoclingSource(BaseKnowledgeSource):
This will auto support PDF, DOCX, and TXT, XLSX, Images, and HTML files without any additional dependencies and follows the docling package as the source of truth.
"""
def __init__(self, *args, **kwargs):
if not DOCLING_AVAILABLE:
raise ImportError(
"The docling package is required to use CrewDoclingSource. "
"Please install it using: uv add docling"
)
super().__init__(*args, **kwargs)
_logger: Logger = Logger(verbose=True)
file_path: Optional[List[Union[Path, str]]] = Field(default=None)