diff --git a/pyproject.toml b/pyproject.toml index bc5ef733e..9756a0d79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ openpyxl = [ mem0 = ["mem0ai>=0.1.29"] docling = [ "docling>=2.12.0", + # Required for transformers compatibility "tokenizers>=0.21,<0.22", ] diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py index 98aedea99..6cdff5c52 100644 --- a/src/crewai/knowledge/source/crew_docling_source.py +++ b/src/crewai/knowledge/source/crew_docling_source.py @@ -15,10 +15,15 @@ from crewai.utilities.logger import Logger class CrewDoclingSource(BaseKnowledgeSource): - """Default Source class for converting documents to markdown or json - This will auto support PDF, DOCX, and TXT, XLSX, Images, and HTML files without any additional dependencies and follows the docling package as the source of truth. + """Default Source class for converting documents to Markdown or JSON + This will auto support PDF, DOCX, TXT, XLSX, Images, and HTML files without any additional dependencies and follows the docling package as the source of truth. - Note: To use this class, install crewai with the docling extra: `pip install crewai[docling]` + Requirements: + - Install with: `pip install crewai[docling]` + - Requires tokenizers>=0.21,<0.22 for transformers compatibility + + Notes: + - This is an optional dependency, only needed for document processing features. """ _logger: Logger = Logger(verbose=True) diff --git a/tests/test_tokenizers_compatibility.py b/tests/test_tokenizers_compatibility.py index 0d7ed6b54..edb6ab78c 100644 --- a/tests/test_tokenizers_compatibility.py +++ b/tests/test_tokenizers_compatibility.py @@ -1,4 +1,6 @@ -"""Test to verify compatibility between tokenizers and transformers.""" +"""Test suite to verify compatibility between tokenizers and transformers packages. +Ensures the installed tokenizers version meets requirements and can work effectively with transformers. +""" import pytest