diff --git a/lib/crewai-tools/pyproject.toml b/lib/crewai-tools/pyproject.toml index 672b604c2..dbcaeb322 100644 --- a/lib/crewai-tools/pyproject.toml +++ b/lib/crewai-tools/pyproject.toml @@ -16,9 +16,9 @@ dependencies = [ "lancedb>=0.5.4", "tiktoken>=0.8.0", "beautifulsoup4>=4.13.4", - "pypdf>=5.9.0", "python-docx>=1.2.0", "youtube-transcript-api>=1.2.2", + "pymupdf>=1.26.6", ] diff --git a/lib/crewai-tools/src/crewai_tools/adapters/crewai_rag_adapter.py b/lib/crewai-tools/src/crewai_tools/adapters/crewai_rag_adapter.py index fb0a22791..b89212de2 100644 --- a/lib/crewai-tools/src/crewai_tools/adapters/crewai_rag_adapter.py +++ b/lib/crewai-tools/src/crewai_tools/adapters/crewai_rag_adapter.py @@ -3,8 +3,7 @@ from __future__ import annotations import hashlib -from pathlib import Path -from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast +from typing import TYPE_CHECKING, Any, cast import uuid from crewai.rag.config.types import RagConfigType @@ -19,15 +18,13 @@ from typing_extensions import TypeIs, Unpack from crewai_tools.rag.data_types import DataType from crewai_tools.rag.misc import sanitize_metadata_for_chromadb from crewai_tools.tools.rag.rag_tool import Adapter +from crewai_tools.tools.rag.types import AddDocumentParams, ContentItem if TYPE_CHECKING: from crewai.rag.qdrant.config import QdrantConfig -ContentItem: TypeAlias = str | Path | dict[str, Any] - - def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]: """Check if config is a QdrantConfig using safe duck typing. @@ -46,19 +43,6 @@ def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]: return False -class AddDocumentParams(TypedDict, total=False): - """Parameters for adding documents to the RAG system.""" - - data_type: DataType - metadata: dict[str, Any] - website: str - url: str - file_path: str | Path - github_url: str - youtube_url: str - directory_path: str | Path - - class CrewAIRagAdapter(Adapter): """Adapter that uses CrewAI's native RAG system. @@ -131,13 +115,26 @@ class CrewAIRagAdapter(Adapter): def add(self, *args: ContentItem, **kwargs: Unpack[AddDocumentParams]) -> None: """Add content to the knowledge base. - This method handles various input types and converts them to documents - for the vector database. It supports the data_type parameter for - compatibility with existing tools. - Args: *args: Content items to add (strings, paths, or document dicts) - **kwargs: Additional parameters including data_type, metadata, etc. + **kwargs: Additional parameters including: + - data_type: DataType enum or string (e.g., "file", "pdf_file", "text") + - path: Path to file or directory (alternative to positional arg) + - file_path: Alias for path + - metadata: Additional metadata to attach to documents + - url: URL to fetch content from + - website: Website URL to scrape + - github_url: GitHub repository URL + - youtube_url: YouTube video URL + - directory_path: Path to directory + + Examples: + rag_tool.add("path/to/document.pdf", data_type=DataType.PDF_FILE) + + rag_tool.add(path="path/to/document.pdf", data_type="file") + rag_tool.add(file_path="path/to/document.pdf", data_type="pdf_file") + + rag_tool.add("path/to/document.pdf") # auto-detects PDF """ import os @@ -146,10 +143,54 @@ class CrewAIRagAdapter(Adapter): from crewai_tools.rag.source_content import SourceContent documents: list[BaseRecord] = [] - data_type: DataType | None = kwargs.get("data_type") + raw_data_type = kwargs.get("data_type") base_metadata: dict[str, Any] = kwargs.get("metadata", {}) - for arg in args: + data_type: DataType | None = None + if raw_data_type is not None: + if isinstance(raw_data_type, DataType): + if raw_data_type != DataType.FILE: + data_type = raw_data_type + elif isinstance(raw_data_type, str): + if raw_data_type != "file": + try: + data_type = DataType(raw_data_type) + except ValueError: + raise ValueError( + f"Invalid data_type: '{raw_data_type}'. " + f"Valid values are: 'file' (auto-detect), or one of: " + f"{', '.join(dt.value for dt in DataType)}" + ) from None + + content_items: list[ContentItem] = list(args) + + path_value = kwargs.get("path") or kwargs.get("file_path") + if path_value is not None: + content_items.append(path_value) + + if url := kwargs.get("url"): + content_items.append(url) + if website := kwargs.get("website"): + content_items.append(website) + if github_url := kwargs.get("github_url"): + content_items.append(github_url) + if youtube_url := kwargs.get("youtube_url"): + content_items.append(youtube_url) + if directory_path := kwargs.get("directory_path"): + content_items.append(directory_path) + + file_extensions = { + ".pdf", + ".txt", + ".csv", + ".json", + ".xml", + ".docx", + ".mdx", + ".md", + } + + for arg in content_items: source_ref: str if isinstance(arg, dict): source_ref = str(arg.get("source", arg.get("content", ""))) @@ -157,6 +198,14 @@ class CrewAIRagAdapter(Adapter): source_ref = str(arg) if not data_type: + ext = os.path.splitext(source_ref)[1].lower() + is_url = source_ref.startswith(("http://", "https://", "file://")) + if ( + ext in file_extensions + and not is_url + and not os.path.isfile(source_ref) + ): + raise FileNotFoundError(f"File does not exist: {source_ref}") data_type = DataTypes.from_content(source_ref) if data_type == DataType.DIRECTORY: diff --git a/lib/crewai-tools/src/crewai_tools/rag/data_types.py b/lib/crewai-tools/src/crewai_tools/rag/data_types.py index 3e9cf724b..09d519ce9 100644 --- a/lib/crewai-tools/src/crewai_tools/rag/data_types.py +++ b/lib/crewai-tools/src/crewai_tools/rag/data_types.py @@ -1,6 +1,8 @@ from enum import Enum +from importlib import import_module import os from pathlib import Path +from typing import cast from urllib.parse import urlparse from crewai_tools.rag.base_loader import BaseLoader @@ -8,6 +10,7 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker class DataType(str, Enum): + FILE = "file" PDF_FILE = "pdf_file" TEXT_FILE = "text_file" CSV = "csv" @@ -15,22 +18,14 @@ class DataType(str, Enum): XML = "xml" DOCX = "docx" MDX = "mdx" - - # Database types MYSQL = "mysql" POSTGRES = "postgres" - - # Repository types GITHUB = "github" DIRECTORY = "directory" - - # Web types WEBSITE = "website" DOCS_SITE = "docs_site" YOUTUBE_VIDEO = "youtube_video" YOUTUBE_CHANNEL = "youtube_channel" - - # Raw types TEXT = "text" def get_chunker(self) -> BaseChunker: @@ -63,13 +58,11 @@ class DataType(str, Enum): try: module = import_module(module_path) - return getattr(module, class_name)() + return cast(BaseChunker, getattr(module, class_name)()) except Exception as e: raise ValueError(f"Error loading chunker for {self}: {e}") from e def get_loader(self) -> BaseLoader: - from importlib import import_module - loaders = { DataType.PDF_FILE: ("pdf_loader", "PDFLoader"), DataType.TEXT_FILE: ("text_loader", "TextFileLoader"), @@ -98,7 +91,7 @@ class DataType(str, Enum): module_path = f"crewai_tools.rag.loaders.{module_name}" try: module = import_module(module_path) - return getattr(module, class_name)() + return cast(BaseLoader, getattr(module, class_name)()) except Exception as e: raise ValueError(f"Error loading loader for {self}: {e}") from e diff --git a/lib/crewai-tools/src/crewai_tools/rag/loaders/pdf_loader.py b/lib/crewai-tools/src/crewai_tools/rag/loaders/pdf_loader.py index 7e7f0f8e3..743e30785 100644 --- a/lib/crewai-tools/src/crewai_tools/rag/loaders/pdf_loader.py +++ b/lib/crewai-tools/src/crewai_tools/rag/loaders/pdf_loader.py @@ -2,70 +2,112 @@ import os from pathlib import Path -from typing import Any +from typing import Any, cast +from urllib.parse import urlparse +import urllib.request from crewai_tools.rag.base_loader import BaseLoader, LoaderResult from crewai_tools.rag.source_content import SourceContent class PDFLoader(BaseLoader): - """Loader for PDF files.""" + """Loader for PDF files and URLs.""" - def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override] - """Load and extract text from a PDF file. + @staticmethod + def _is_url(path: str) -> bool: + """Check if the path is a URL.""" + try: + parsed = urlparse(path) + return parsed.scheme in ("http", "https") + except Exception: + return False + + @staticmethod + def _download_pdf(url: str) -> bytes: + """Download PDF content from a URL. Args: - source: The source content containing the PDF file path + url: The URL to download from. Returns: - LoaderResult with extracted text content + The PDF content as bytes. Raises: - FileNotFoundError: If the PDF file doesn't exist - ImportError: If required PDF libraries aren't installed + ValueError: If the download fails. + """ + + try: + with urllib.request.urlopen(url, timeout=30) as response: # noqa: S310 + return cast(bytes, response.read()) + except Exception as e: + raise ValueError(f"Failed to download PDF from {url}: {e!s}") from e + + def load(self, source: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override] + """Load and extract text from a PDF file or URL. + + Args: + source: The source content containing the PDF file path or URL. + + Returns: + LoaderResult with extracted text content. + + Raises: + FileNotFoundError: If the PDF file doesn't exist. + ImportError: If required PDF libraries aren't installed. + ValueError: If the PDF cannot be read or downloaded. """ try: - import pypdf - except ImportError: - try: - import PyPDF2 as pypdf # type: ignore[import-not-found,no-redef] # noqa: N813 - except ImportError as e: - raise ImportError( - "PDF support requires pypdf or PyPDF2. Install with: uv add pypdf" - ) from e + import pymupdf # type: ignore[import-untyped] + except ImportError as e: + raise ImportError( + "PDF support requires pymupdf. Install with: uv add pymupdf" + ) from e file_path = source.source + is_url = self._is_url(file_path) - if not os.path.isfile(file_path): - raise FileNotFoundError(f"PDF file not found: {file_path}") + if is_url: + source_name = Path(urlparse(file_path).path).name or "downloaded.pdf" + else: + source_name = Path(file_path).name - text_content = [] + text_content: list[str] = [] metadata: dict[str, Any] = { - "source": str(file_path), - "file_name": Path(file_path).name, + "source": file_path, + "file_name": source_name, "file_type": "pdf", } try: - with open(file_path, "rb") as file: - pdf_reader = pypdf.PdfReader(file) - metadata["num_pages"] = len(pdf_reader.pages) + if is_url: + pdf_bytes = self._download_pdf(file_path) + doc = pymupdf.open(stream=pdf_bytes, filetype="pdf") + else: + if not os.path.isfile(file_path): + raise FileNotFoundError(f"PDF file not found: {file_path}") + doc = pymupdf.open(file_path) - for page_num, page in enumerate(pdf_reader.pages, 1): - page_text = page.extract_text() - if page_text.strip(): - text_content.append(f"Page {page_num}:\n{page_text}") + metadata["num_pages"] = len(doc) + + for page_num, page in enumerate(doc, 1): + page_text = page.get_text() + if page_text.strip(): + text_content.append(f"Page {page_num}:\n{page_text}") + + doc.close() + except FileNotFoundError: + raise except Exception as e: - raise ValueError(f"Error reading PDF file {file_path}: {e!s}") from e + raise ValueError(f"Error reading PDF from {file_path}: {e!s}") from e if not text_content: - content = f"[PDF file with no extractable text: {Path(file_path).name}]" + content = f"[PDF file with no extractable text: {source_name}]" else: content = "\n\n".join(text_content) return LoaderResult( content=content, - source=str(file_path), + source=file_path, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=str(file_path), content=content), + doc_id=self.generate_doc_id(source_ref=file_path, content=content), ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py b/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py index 549a01062..52fc903e9 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py @@ -14,9 +14,14 @@ from pydantic import ( field_validator, model_validator, ) -from typing_extensions import Self +from typing_extensions import Self, Unpack -from crewai_tools.tools.rag.types import RagToolConfig, VectorDbConfig +from crewai_tools.tools.rag.types import ( + AddDocumentParams, + ContentItem, + RagToolConfig, + VectorDbConfig, +) def _validate_embedding_config( @@ -72,6 +77,8 @@ def _validate_embedding_config( class Adapter(BaseModel, ABC): + """Abstract base class for RAG adapters.""" + model_config = ConfigDict(arbitrary_types_allowed=True) @abstractmethod @@ -86,8 +93,8 @@ class Adapter(BaseModel, ABC): @abstractmethod def add( self, - *args: Any, - **kwargs: Any, + *args: ContentItem, + **kwargs: Unpack[AddDocumentParams], ) -> None: """Add content to the knowledge base.""" @@ -102,7 +109,11 @@ class RagTool(BaseTool): ) -> str: raise NotImplementedError - def add(self, *args: Any, **kwargs: Any) -> None: + def add( + self, + *args: ContentItem, + **kwargs: Unpack[AddDocumentParams], + ) -> None: raise NotImplementedError name: str = "Knowledge base" @@ -207,9 +218,34 @@ class RagTool(BaseTool): def add( self, - *args: Any, - **kwargs: Any, + *args: ContentItem, + **kwargs: Unpack[AddDocumentParams], ) -> None: + """Add content to the knowledge base. + + + Args: + *args: Content items to add (strings, paths, or document dicts) + data_type: DataType enum or string (e.g., "file", "pdf_file", "text") + path: Path to file or directory, alias to positional arg + file_path: Alias for path + metadata: Additional metadata to attach to documents + url: URL to fetch content from + website: Website URL to scrape + github_url: GitHub repository URL + youtube_url: YouTube video URL + directory_path: Path to directory + + Examples: + rag_tool.add("path/to/document.pdf", data_type=DataType.PDF_FILE) + + # Keyword argument (documented API) + rag_tool.add(path="path/to/document.pdf", data_type="file") + rag_tool.add(file_path="path/to/document.pdf", data_type="pdf_file") + + # Auto-detect type from extension + rag_tool.add("path/to/document.pdf") # auto-detects PDF + """ self.adapter.add(*args, **kwargs) def _run( diff --git a/lib/crewai-tools/src/crewai_tools/tools/rag/types.py b/lib/crewai-tools/src/crewai_tools/tools/rag/types.py index 1077c7b9b..606f86401 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/rag/types.py +++ b/lib/crewai-tools/src/crewai_tools/tools/rag/types.py @@ -1,10 +1,50 @@ """Type definitions for RAG tool configuration.""" -from typing import Any, Literal +from pathlib import Path +from typing import Any, Literal, TypeAlias from crewai.rag.embeddings.types import ProviderSpec from typing_extensions import TypedDict +from crewai_tools.rag.data_types import DataType + + +DataTypeStr: TypeAlias = Literal[ + "file", + "pdf_file", + "text_file", + "csv", + "json", + "xml", + "docx", + "mdx", + "mysql", + "postgres", + "github", + "directory", + "website", + "docs_site", + "youtube_video", + "youtube_channel", + "text", +] + +ContentItem: TypeAlias = str | Path | dict[str, Any] + + +class AddDocumentParams(TypedDict, total=False): + """Parameters for adding documents to the RAG system.""" + + data_type: DataType | DataTypeStr + metadata: dict[str, Any] + path: str | Path + file_path: str | Path + website: str + url: str + github_url: str + youtube_url: str + directory_path: str | Path + class VectorDbConfig(TypedDict): """Configuration for vector database provider. diff --git a/lib/crewai-tools/tests/tools/rag/test_rag_tool_add_data_type.py b/lib/crewai-tools/tests/tools/rag/test_rag_tool_add_data_type.py new file mode 100644 index 000000000..853e6ab00 --- /dev/null +++ b/lib/crewai-tools/tests/tools/rag/test_rag_tool_add_data_type.py @@ -0,0 +1,471 @@ +"""Tests for RagTool.add() method with various data_type values.""" + +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from crewai_tools.rag.data_types import DataType +from crewai_tools.tools.rag.rag_tool import RagTool + + +@pytest.fixture +def mock_rag_client() -> MagicMock: + """Create a mock RAG client for testing.""" + mock_client = MagicMock() + mock_client.get_or_create_collection = MagicMock(return_value=None) + mock_client.add_documents = MagicMock(return_value=None) + mock_client.search = MagicMock(return_value=[]) + return mock_client + + +@pytest.fixture +def rag_tool(mock_rag_client: MagicMock) -> RagTool: + """Create a RagTool instance with mocked client.""" + with ( + patch( + "crewai_tools.adapters.crewai_rag_adapter.get_rag_client", + return_value=mock_rag_client, + ), + patch( + "crewai_tools.adapters.crewai_rag_adapter.create_client", + return_value=mock_rag_client, + ), + ): + return RagTool() + + +class TestDataTypeFileAlias: + """Tests for data_type='file' alias.""" + + def test_file_alias_with_existing_file( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test that data_type='file' works with existing files.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Test content for file alias.") + + rag_tool.add(path=str(test_file), data_type="file") + + assert mock_rag_client.add_documents.called + + def test_file_alias_with_nonexistent_file_raises_error( + self, rag_tool: RagTool + ) -> None: + """Test that data_type='file' raises FileNotFoundError for missing files.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent/path/to/file.pdf", data_type="file") + + def test_file_alias_with_path_keyword( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test that path keyword argument works with data_type='file'.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "document.txt" + test_file.write_text("Content via path keyword.") + + rag_tool.add(data_type="file", path=str(test_file)) + + assert mock_rag_client.add_documents.called + + def test_file_alias_with_file_path_keyword( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test that file_path keyword argument works with data_type='file'.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "document.txt" + test_file.write_text("Content via file_path keyword.") + + rag_tool.add(data_type="file", file_path=str(test_file)) + + assert mock_rag_client.add_documents.called + + +class TestDataTypeStringValues: + """Tests for data_type as string values matching DataType enum.""" + + def test_pdf_file_string( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type='pdf_file' with existing PDF file.""" + with TemporaryDirectory() as tmpdir: + # Create a minimal valid PDF file + test_file = Path(tmpdir) / "test.pdf" + test_file.write_bytes( + b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\ntrailer\n" + b"<<\n/Root 1 0 R\n>>\n%%EOF" + ) + + # Mock the PDF loader to avoid actual PDF parsing + with patch( + "crewai_tools.adapters.crewai_rag_adapter.DataType.get_loader" + ) as mock_loader: + mock_loader_instance = MagicMock() + mock_loader_instance.load.return_value = MagicMock( + content="PDF content", metadata={}, doc_id="test-id" + ) + mock_loader.return_value = mock_loader_instance + + rag_tool.add(path=str(test_file), data_type="pdf_file") + + assert mock_rag_client.add_documents.called + + def test_text_file_string( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type='text_file' with existing text file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Plain text content.") + + rag_tool.add(path=str(test_file), data_type="text_file") + + assert mock_rag_client.add_documents.called + + def test_csv_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None: + """Test data_type='csv' with existing CSV file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.csv" + test_file.write_text("name,value\nfoo,1\nbar,2") + + rag_tool.add(path=str(test_file), data_type="csv") + + assert mock_rag_client.add_documents.called + + def test_json_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None: + """Test data_type='json' with existing JSON file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.json" + test_file.write_text('{"key": "value", "items": [1, 2, 3]}') + + rag_tool.add(path=str(test_file), data_type="json") + + assert mock_rag_client.add_documents.called + + def test_xml_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None: + """Test data_type='xml' with existing XML file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.xml" + test_file.write_text('value') + + rag_tool.add(path=str(test_file), data_type="xml") + + assert mock_rag_client.add_documents.called + + def test_mdx_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None: + """Test data_type='mdx' with existing MDX file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.mdx" + test_file.write_text("# Heading\n\nSome markdown content.") + + rag_tool.add(path=str(test_file), data_type="mdx") + + assert mock_rag_client.add_documents.called + + def test_text_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None: + """Test data_type='text' with raw text content.""" + rag_tool.add("This is raw text content.", data_type="text") + + assert mock_rag_client.add_documents.called + + def test_directory_string( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type='directory' with existing directory.""" + with TemporaryDirectory() as tmpdir: + # Create some files in the directory + (Path(tmpdir) / "file1.txt").write_text("Content 1") + (Path(tmpdir) / "file2.txt").write_text("Content 2") + + rag_tool.add(path=tmpdir, data_type="directory") + + assert mock_rag_client.add_documents.called + + +class TestDataTypeEnumValues: + """Tests for data_type as DataType enum values.""" + + def test_datatype_file_enum_with_existing_file( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type=DataType.FILE with existing file (auto-detect).""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("File enum auto-detect content.") + + rag_tool.add(str(test_file), data_type=DataType.FILE) + + assert mock_rag_client.add_documents.called + + def test_datatype_file_enum_with_nonexistent_file_raises_error( + self, rag_tool: RagTool + ) -> None: + """Test data_type=DataType.FILE raises FileNotFoundError for missing files.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add("nonexistent/file.pdf", data_type=DataType.FILE) + + def test_datatype_pdf_file_enum( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type=DataType.PDF_FILE with existing file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.pdf" + test_file.write_bytes( + b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\ntrailer\n" + b"<<\n/Root 1 0 R\n>>\n%%EOF" + ) + + with patch( + "crewai_tools.adapters.crewai_rag_adapter.DataType.get_loader" + ) as mock_loader: + mock_loader_instance = MagicMock() + mock_loader_instance.load.return_value = MagicMock( + content="PDF content", metadata={}, doc_id="test-id" + ) + mock_loader.return_value = mock_loader_instance + + rag_tool.add(str(test_file), data_type=DataType.PDF_FILE) + + assert mock_rag_client.add_documents.called + + def test_datatype_text_file_enum( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type=DataType.TEXT_FILE with existing file.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Text file content.") + + rag_tool.add(str(test_file), data_type=DataType.TEXT_FILE) + + assert mock_rag_client.add_documents.called + + def test_datatype_text_enum( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type=DataType.TEXT with raw text.""" + rag_tool.add("Raw text using enum.", data_type=DataType.TEXT) + + assert mock_rag_client.add_documents.called + + def test_datatype_directory_enum( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test data_type=DataType.DIRECTORY with existing directory.""" + with TemporaryDirectory() as tmpdir: + (Path(tmpdir) / "file.txt").write_text("Directory file content.") + + rag_tool.add(tmpdir, data_type=DataType.DIRECTORY) + + assert mock_rag_client.add_documents.called + + +class TestInvalidDataType: + """Tests for invalid data_type values.""" + + def test_invalid_string_data_type_raises_error(self, rag_tool: RagTool) -> None: + """Test that invalid string data_type raises ValueError.""" + with pytest.raises(ValueError, match="Invalid data_type"): + rag_tool.add("some content", data_type="invalid_type") + + def test_invalid_data_type_error_message_contains_valid_values( + self, rag_tool: RagTool + ) -> None: + """Test that error message lists valid data_type values.""" + with pytest.raises(ValueError) as exc_info: + rag_tool.add("some content", data_type="not_a_type") + + error_message = str(exc_info.value) + assert "file" in error_message + assert "pdf_file" in error_message + assert "text_file" in error_message + + +class TestFileExistenceValidation: + """Tests for file existence validation.""" + + def test_pdf_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent PDF file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.pdf", data_type="pdf_file") + + def test_text_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent text file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.txt", data_type="text_file") + + def test_csv_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent CSV file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.csv", data_type="csv") + + def test_json_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent JSON file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.json", data_type="json") + + def test_xml_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent XML file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.xml", data_type="xml") + + def test_docx_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent DOCX file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.docx", data_type="docx") + + def test_mdx_file_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent MDX file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add(path="nonexistent.mdx", data_type="mdx") + + def test_directory_not_found_raises_error(self, rag_tool: RagTool) -> None: + """Test that non-existent directory raises ValueError.""" + with pytest.raises(ValueError, match="Directory does not exist"): + rag_tool.add(path="nonexistent/directory", data_type="directory") + + +class TestKeywordArgumentVariants: + """Tests for different keyword argument combinations.""" + + def test_positional_argument_with_data_type( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test positional argument with data_type.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Positional arg content.") + + rag_tool.add(str(test_file), data_type="text_file") + + assert mock_rag_client.add_documents.called + + def test_path_keyword_with_data_type( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test path keyword argument with data_type.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Path keyword content.") + + rag_tool.add(path=str(test_file), data_type="text_file") + + assert mock_rag_client.add_documents.called + + def test_file_path_keyword_with_data_type( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test file_path keyword argument with data_type.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("File path keyword content.") + + rag_tool.add(file_path=str(test_file), data_type="text_file") + + assert mock_rag_client.add_documents.called + + def test_directory_path_keyword( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test directory_path keyword argument.""" + with TemporaryDirectory() as tmpdir: + (Path(tmpdir) / "file.txt").write_text("Directory content.") + + rag_tool.add(directory_path=tmpdir) + + assert mock_rag_client.add_documents.called + + +class TestAutoDetection: + """Tests for auto-detection of data type from content.""" + + def test_auto_detect_nonexistent_file_raises_error(self, rag_tool: RagTool) -> None: + """Test that auto-detection raises FileNotFoundError for missing files.""" + with pytest.raises(FileNotFoundError, match="File does not exist"): + rag_tool.add("path/to/document.pdf") + + def test_auto_detect_txt_file( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test auto-detection of .txt file type.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "auto.txt" + test_file.write_text("Auto-detected text file.") + + # No data_type specified - should auto-detect + rag_tool.add(str(test_file)) + + assert mock_rag_client.add_documents.called + + def test_auto_detect_csv_file( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test auto-detection of .csv file type.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "auto.csv" + test_file.write_text("col1,col2\nval1,val2") + + rag_tool.add(str(test_file)) + + assert mock_rag_client.add_documents.called + + def test_auto_detect_json_file( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test auto-detection of .json file type.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "auto.json" + test_file.write_text('{"auto": "detected"}') + + rag_tool.add(str(test_file)) + + assert mock_rag_client.add_documents.called + + def test_auto_detect_directory( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test auto-detection of directory type.""" + with TemporaryDirectory() as tmpdir: + (Path(tmpdir) / "file.txt").write_text("Auto-detected directory.") + + rag_tool.add(tmpdir) + + assert mock_rag_client.add_documents.called + + def test_auto_detect_raw_text( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test auto-detection of raw text (non-file content).""" + rag_tool.add("Just some raw text content") + + assert mock_rag_client.add_documents.called + + +class TestMetadataHandling: + """Tests for metadata handling with data_type.""" + + def test_metadata_passed_to_documents( + self, rag_tool: RagTool, mock_rag_client: MagicMock + ) -> None: + """Test that metadata is properly passed to documents.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Content with metadata.") + + rag_tool.add( + path=str(test_file), + data_type="text_file", + metadata={"custom_key": "custom_value"}, + ) + + assert mock_rag_client.add_documents.called + call_args = mock_rag_client.add_documents.call_args + documents = call_args.kwargs.get("documents", call_args.args[0] if call_args.args else []) + + # Check that at least one document has the custom metadata + assert any( + doc.get("metadata", {}).get("custom_key") == "custom_value" + for doc in documents + ) \ No newline at end of file diff --git a/lib/crewai/src/crewai/mcp/transports/sse.py b/lib/crewai/src/crewai/mcp/transports/sse.py index ce418c51f..c2184e7d0 100644 --- a/lib/crewai/src/crewai/mcp/transports/sse.py +++ b/lib/crewai/src/crewai/mcp/transports/sse.py @@ -66,7 +66,6 @@ class SSETransport(BaseTransport): self._transport_context = sse_client( self.url, headers=self.headers if self.headers else None, - terminate_on_close=True, ) read, write = await self._transport_context.__aenter__() diff --git a/lib/crewai/tests/mcp/test_sse_transport.py b/lib/crewai/tests/mcp/test_sse_transport.py new file mode 100644 index 000000000..a714c6ce7 --- /dev/null +++ b/lib/crewai/tests/mcp/test_sse_transport.py @@ -0,0 +1,22 @@ +"""Tests for SSE transport.""" + +import pytest + +from crewai.mcp.transports.sse import SSETransport + + +@pytest.mark.asyncio +async def test_sse_transport_connect_does_not_pass_invalid_args(): + """Test that SSETransport.connect() doesn't pass invalid args to sse_client. + + The sse_client function does not accept terminate_on_close parameter. + """ + transport = SSETransport( + url="http://localhost:9999/sse", + headers={"Authorization": "Bearer test"}, + ) + + with pytest.raises(ConnectionError) as exc_info: + await transport.connect() + + assert "unexpected keyword argument" not in str(exc_info.value) \ No newline at end of file diff --git a/uv.lock b/uv.lock index 7025932ac..6029c59c1 100644 --- a/uv.lock +++ b/uv.lock @@ -1225,7 +1225,7 @@ dependencies = [ { name = "crewai" }, { name = "docker" }, { name = "lancedb" }, - { name = "pypdf" }, + { name = "pymupdf" }, { name = "python-docx" }, { name = "pytube" }, { name = "requests" }, @@ -1382,8 +1382,8 @@ requires-dist = [ { name = "psycopg2-binary", marker = "extra == 'postgresql'", specifier = ">=2.9.10" }, { name = "pygithub", marker = "extra == 'github'", specifier = "==1.59.1" }, { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.13" }, + { name = "pymupdf", specifier = ">=1.26.6" }, { name = "pymysql", marker = "extra == 'mysql'", specifier = ">=1.1.1" }, - { name = "pypdf", specifier = ">=5.9.0" }, { name = "python-docx", specifier = ">=1.2.0" }, { name = "python-docx", marker = "extra == 'rag'", specifier = ">=1.1.0" }, { name = "pytube", specifier = ">=15.0.0" }, @@ -2224,6 +2224,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, { url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" }, + { url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" }, { url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" }, { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, @@ -2233,6 +2235,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" }, + { url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" }, + { url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" }, { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" }, { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, @@ -2242,6 +2246,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, + { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" }, + { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" }, { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, @@ -2251,6 +2257,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" }, + { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" }, + { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" }, { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" }, ] @@ -5970,6 +5978,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/7c/42f0b6997324023e94939f8f32b9a8dd928499f4b5d7b4412905368686b5/pymongo-4.15.3-cp313-cp313-win_arm64.whl", hash = "sha256:fb384623ece34db78d445dd578a52d28b74e8319f4d9535fbaff79d0eae82b3d", size = 944300, upload-time = "2025-10-07T21:56:58.969Z" }, ] +[[package]] +name = "pymupdf" +version = "1.26.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/d7/a6f0e03a117fa2ad79c4b898203bb212b17804f92558a6a339298faca7bb/pymupdf-1.26.6.tar.gz", hash = "sha256:a2b4531cd4ab36d6f1f794bb6d3c33b49bda22f36d58bb1f3e81cbc10183bd2b", size = 84322494, upload-time = "2025-11-05T15:20:46.786Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/5c/dec354eee5fe4966c715f33818ed4193e0e6c986cf8484de35b6c167fb8e/pymupdf-1.26.6-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:e46f320a136ad55e5219e8f0f4061bdf3e4c12b126d2740d5a49f73fae7ea176", size = 23178988, upload-time = "2025-11-05T14:31:19.834Z" }, + { url = "https://files.pythonhosted.org/packages/ec/a0/11adb742d18142bd623556cd3b5d64649816decc5eafd30efc9498657e76/pymupdf-1.26.6-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:6844cd2396553c0fa06de4869d5d5ecb1260e6fc3b9d85abe8fa35f14dd9d688", size = 22469764, upload-time = "2025-11-05T14:32:34.654Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c8/377cf20e31f58d4c243bfcf2d3cb7466d5b97003b10b9f1161f11eb4a994/pymupdf-1.26.6-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:617ba69e02c44f0da1c0e039ea4a26cf630849fd570e169c71daeb8ac52a81d6", size = 23502227, upload-time = "2025-11-06T11:03:56.934Z" }, + { url = "https://files.pythonhosted.org/packages/4f/bf/6e02e3d84b32c137c71a0a3dcdba8f2f6e9950619a3bc272245c7c06a051/pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7777d0b7124c2ebc94849536b6a1fb85d158df3b9d873935e63036559391534c", size = 24115381, upload-time = "2025-11-05T14:33:54.338Z" }, + { url = "https://files.pythonhosted.org/packages/ab/9d/30f7fcb3776bfedde66c06297960debe4883b1667294a1ee9426c942e94d/pymupdf-1.26.6-cp310-abi3-win32.whl", hash = "sha256:8f3ef05befc90ca6bb0f12983200a7048d5bff3e1c1edef1bb3de60b32cb5274", size = 17203613, upload-time = "2025-11-05T17:19:47.494Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e8/989f4eaa369c7166dc24f0eaa3023f13788c40ff1b96701f7047421554a8/pymupdf-1.26.6-cp310-abi3-win_amd64.whl", hash = "sha256:ce02ca96ed0d1acfd00331a4d41a34c98584d034155b06fd4ec0f051718de7ba", size = 18405680, upload-time = "2025-11-05T14:34:48.672Z" }, +] + [[package]] name = "pymysql" version = "1.1.2"