diff --git a/src/crewai/knowledge/source/pdf_knowledge_source.py b/src/crewai/knowledge/source/pdf_knowledge_source.py index a08f16e5d..672c86355 100644 --- a/src/crewai/knowledge/source/pdf_knowledge_source.py +++ b/src/crewai/knowledge/source/pdf_knowledge_source.py @@ -14,6 +14,15 @@ from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledge class PDFKnowledgeSource(BaseFileKnowledgeSource): """A knowledge source that stores and queries PDF file content using embeddings.""" + def __init__(self, *args, **kwargs): + """Initialize PDFKnowledgeSource and check for pdfplumber availability.""" + if not PDFPLUMBER_AVAILABLE: + raise ImportError( + "pdfplumber is required for PDF knowledge sources. " + "Please install it with: pip install 'crewai[knowledge]'" + ) + super().__init__(*args, **kwargs) + def load_content(self) -> Dict[Path, str]: """Load and preprocess PDF file content.""" if not PDFPLUMBER_AVAILABLE: diff --git a/tests/test_optional_dependencies.py b/tests/test_optional_dependencies.py index ef913e1d3..aafc1fc64 100644 --- a/tests/test_optional_dependencies.py +++ b/tests/test_optional_dependencies.py @@ -44,10 +44,8 @@ class TestOptionalDependencies: test_file.touch() try: - pdf_source = PDFKnowledgeSource(file_paths=["test.pdf"]) - with pytest.raises(ImportError) as exc_info: - pdf_source._import_pdfplumber() + PDFKnowledgeSource(file_paths=["test.pdf"]) assert "pdfplumber is required" in str(exc_info.value) assert "crewai[knowledge]" in str(exc_info.value)