From 0b90d52405945580c9fb3aa7c5403048a894334e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 8 May 2025 09:18:53 +0000 Subject: [PATCH] Fix #2782: Resolve tokenizers/transformers dependency conflict by making tokenizers an optional dependency Co-Authored-By: Joe Moura --- pyproject.toml | 1 + .../knowledge/source/crew_docling_source.py | 2 ++ tests/test_tokenizers_compatibility.py | 24 +++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 tests/test_tokenizers_compatibility.py diff --git a/pyproject.toml b/pyproject.toml index 3f10c1a87..bc5ef733e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ openpyxl = [ mem0 = ["mem0ai>=0.1.29"] docling = [ "docling>=2.12.0", + "tokenizers>=0.21,<0.22", ] [tool.uv] diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py index 8b197168b..98aedea99 100644 --- a/src/crewai/knowledge/source/crew_docling_source.py +++ b/src/crewai/knowledge/source/crew_docling_source.py @@ -17,6 +17,8 @@ from crewai.utilities.logger import Logger class CrewDoclingSource(BaseKnowledgeSource): """Default Source class for converting documents to markdown or json This will auto support PDF, DOCX, and TXT, XLSX, Images, and HTML files without any additional dependencies and follows the docling package as the source of truth. + + Note: To use this class, install crewai with the docling extra: `pip install crewai[docling]` """ _logger: Logger = Logger(verbose=True) diff --git a/tests/test_tokenizers_compatibility.py b/tests/test_tokenizers_compatibility.py new file mode 100644 index 000000000..0d7ed6b54 --- /dev/null +++ b/tests/test_tokenizers_compatibility.py @@ -0,0 +1,24 @@ +"""Test to verify compatibility between tokenizers and transformers.""" + +import pytest + + +def test_tokenizers_transformers_compatibility(): + """Test that the installed tokenizers version is compatible with transformers.""" + try: + import tokenizers + import transformers + except ImportError: + pytest.skip("tokenizers or transformers not installed") + + tokenizers_version = tokenizers.__version__ + transformers_version = transformers.__version__ + + tokenizers_major, tokenizers_minor, _ = map(int, tokenizers_version.split('.')) + + assert tokenizers_major == 0, f"Expected tokenizers major version 0, got {tokenizers_major}" + assert tokenizers_minor >= 21, f"Expected tokenizers minor version >=21, got {tokenizers_minor}" + assert tokenizers_minor < 22, f"Expected tokenizers minor version <22, got {tokenizers_minor}" + + print(f"Tokenizers version: {tokenizers_version}") + print(f"Transformers version: {transformers_version}")