Merge branch 'main' into gl/fix/ensure-complete-tool-signature

This commit is contained in:
Greyson LaLonde
2025-11-27 02:19:18 -05:00
committed by GitHub
10 changed files with 755 additions and 81 deletions

View File

@@ -16,9 +16,9 @@ dependencies = [
"lancedb>=0.5.4", "lancedb>=0.5.4",
"tiktoken>=0.8.0", "tiktoken>=0.8.0",
"beautifulsoup4>=4.13.4", "beautifulsoup4>=4.13.4",
"pypdf>=5.9.0",
"python-docx>=1.2.0", "python-docx>=1.2.0",
"youtube-transcript-api>=1.2.2", "youtube-transcript-api>=1.2.2",
"pymupdf>=1.26.6",
] ]

View File

@@ -3,8 +3,7 @@
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib
from pathlib import Path from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
import uuid import uuid
from crewai.rag.config.types import RagConfigType from crewai.rag.config.types import RagConfigType
@@ -19,15 +18,13 @@ from typing_extensions import TypeIs, Unpack
from crewai_tools.rag.data_types import DataType from crewai_tools.rag.data_types import DataType
from crewai_tools.rag.misc import sanitize_metadata_for_chromadb from crewai_tools.rag.misc import sanitize_metadata_for_chromadb
from crewai_tools.tools.rag.rag_tool import Adapter from crewai_tools.tools.rag.rag_tool import Adapter
from crewai_tools.tools.rag.types import AddDocumentParams, ContentItem
if TYPE_CHECKING: if TYPE_CHECKING:
from crewai.rag.qdrant.config import QdrantConfig from crewai.rag.qdrant.config import QdrantConfig
ContentItem: TypeAlias = str | Path | dict[str, Any]
def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]: def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]:
"""Check if config is a QdrantConfig using safe duck typing. """Check if config is a QdrantConfig using safe duck typing.
@@ -46,19 +43,6 @@ def _is_qdrant_config(config: Any) -> TypeIs[QdrantConfig]:
return False return False
class AddDocumentParams(TypedDict, total=False):
"""Parameters for adding documents to the RAG system."""
data_type: DataType
metadata: dict[str, Any]
website: str
url: str
file_path: str | Path
github_url: str
youtube_url: str
directory_path: str | Path
class CrewAIRagAdapter(Adapter): class CrewAIRagAdapter(Adapter):
"""Adapter that uses CrewAI's native RAG system. """Adapter that uses CrewAI's native RAG system.
@@ -131,13 +115,26 @@ class CrewAIRagAdapter(Adapter):
def add(self, *args: ContentItem, **kwargs: Unpack[AddDocumentParams]) -> None: def add(self, *args: ContentItem, **kwargs: Unpack[AddDocumentParams]) -> None:
"""Add content to the knowledge base. """Add content to the knowledge base.
This method handles various input types and converts them to documents
for the vector database. It supports the data_type parameter for
compatibility with existing tools.
Args: Args:
*args: Content items to add (strings, paths, or document dicts) *args: Content items to add (strings, paths, or document dicts)
**kwargs: Additional parameters including data_type, metadata, etc. **kwargs: Additional parameters including:
- data_type: DataType enum or string (e.g., "file", "pdf_file", "text")
- path: Path to file or directory (alternative to positional arg)
- file_path: Alias for path
- metadata: Additional metadata to attach to documents
- url: URL to fetch content from
- website: Website URL to scrape
- github_url: GitHub repository URL
- youtube_url: YouTube video URL
- directory_path: Path to directory
Examples:
rag_tool.add("path/to/document.pdf", data_type=DataType.PDF_FILE)
rag_tool.add(path="path/to/document.pdf", data_type="file")
rag_tool.add(file_path="path/to/document.pdf", data_type="pdf_file")
rag_tool.add("path/to/document.pdf") # auto-detects PDF
""" """
import os import os
@@ -146,10 +143,54 @@ class CrewAIRagAdapter(Adapter):
from crewai_tools.rag.source_content import SourceContent from crewai_tools.rag.source_content import SourceContent
documents: list[BaseRecord] = [] documents: list[BaseRecord] = []
data_type: DataType | None = kwargs.get("data_type") raw_data_type = kwargs.get("data_type")
base_metadata: dict[str, Any] = kwargs.get("metadata", {}) base_metadata: dict[str, Any] = kwargs.get("metadata", {})
for arg in args: data_type: DataType | None = None
if raw_data_type is not None:
if isinstance(raw_data_type, DataType):
if raw_data_type != DataType.FILE:
data_type = raw_data_type
elif isinstance(raw_data_type, str):
if raw_data_type != "file":
try:
data_type = DataType(raw_data_type)
except ValueError:
raise ValueError(
f"Invalid data_type: '{raw_data_type}'. "
f"Valid values are: 'file' (auto-detect), or one of: "
f"{', '.join(dt.value for dt in DataType)}"
) from None
content_items: list[ContentItem] = list(args)
path_value = kwargs.get("path") or kwargs.get("file_path")
if path_value is not None:
content_items.append(path_value)
if url := kwargs.get("url"):
content_items.append(url)
if website := kwargs.get("website"):
content_items.append(website)
if github_url := kwargs.get("github_url"):
content_items.append(github_url)
if youtube_url := kwargs.get("youtube_url"):
content_items.append(youtube_url)
if directory_path := kwargs.get("directory_path"):
content_items.append(directory_path)
file_extensions = {
".pdf",
".txt",
".csv",
".json",
".xml",
".docx",
".mdx",
".md",
}
for arg in content_items:
source_ref: str source_ref: str
if isinstance(arg, dict): if isinstance(arg, dict):
source_ref = str(arg.get("source", arg.get("content", ""))) source_ref = str(arg.get("source", arg.get("content", "")))
@@ -157,6 +198,14 @@ class CrewAIRagAdapter(Adapter):
source_ref = str(arg) source_ref = str(arg)
if not data_type: if not data_type:
ext = os.path.splitext(source_ref)[1].lower()
is_url = source_ref.startswith(("http://", "https://", "file://"))
if (
ext in file_extensions
and not is_url
and not os.path.isfile(source_ref)
):
raise FileNotFoundError(f"File does not exist: {source_ref}")
data_type = DataTypes.from_content(source_ref) data_type = DataTypes.from_content(source_ref)
if data_type == DataType.DIRECTORY: if data_type == DataType.DIRECTORY:

View File

@@ -1,6 +1,8 @@
from enum import Enum from enum import Enum
from importlib import import_module
import os import os
from pathlib import Path from pathlib import Path
from typing import cast
from urllib.parse import urlparse from urllib.parse import urlparse
from crewai_tools.rag.base_loader import BaseLoader from crewai_tools.rag.base_loader import BaseLoader
@@ -8,6 +10,7 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker
class DataType(str, Enum): class DataType(str, Enum):
FILE = "file"
PDF_FILE = "pdf_file" PDF_FILE = "pdf_file"
TEXT_FILE = "text_file" TEXT_FILE = "text_file"
CSV = "csv" CSV = "csv"
@@ -15,22 +18,14 @@ class DataType(str, Enum):
XML = "xml" XML = "xml"
DOCX = "docx" DOCX = "docx"
MDX = "mdx" MDX = "mdx"
# Database types
MYSQL = "mysql" MYSQL = "mysql"
POSTGRES = "postgres" POSTGRES = "postgres"
# Repository types
GITHUB = "github" GITHUB = "github"
DIRECTORY = "directory" DIRECTORY = "directory"
# Web types
WEBSITE = "website" WEBSITE = "website"
DOCS_SITE = "docs_site" DOCS_SITE = "docs_site"
YOUTUBE_VIDEO = "youtube_video" YOUTUBE_VIDEO = "youtube_video"
YOUTUBE_CHANNEL = "youtube_channel" YOUTUBE_CHANNEL = "youtube_channel"
# Raw types
TEXT = "text" TEXT = "text"
def get_chunker(self) -> BaseChunker: def get_chunker(self) -> BaseChunker:
@@ -63,13 +58,11 @@ class DataType(str, Enum):
try: try:
module = import_module(module_path) module = import_module(module_path)
return getattr(module, class_name)() return cast(BaseChunker, getattr(module, class_name)())
except Exception as e: except Exception as e:
raise ValueError(f"Error loading chunker for {self}: {e}") from e raise ValueError(f"Error loading chunker for {self}: {e}") from e
def get_loader(self) -> BaseLoader: def get_loader(self) -> BaseLoader:
from importlib import import_module
loaders = { loaders = {
DataType.PDF_FILE: ("pdf_loader", "PDFLoader"), DataType.PDF_FILE: ("pdf_loader", "PDFLoader"),
DataType.TEXT_FILE: ("text_loader", "TextFileLoader"), DataType.TEXT_FILE: ("text_loader", "TextFileLoader"),
@@ -98,7 +91,7 @@ class DataType(str, Enum):
module_path = f"crewai_tools.rag.loaders.{module_name}" module_path = f"crewai_tools.rag.loaders.{module_name}"
try: try:
module = import_module(module_path) module = import_module(module_path)
return getattr(module, class_name)() return cast(BaseLoader, getattr(module, class_name)())
except Exception as e: except Exception as e:
raise ValueError(f"Error loading loader for {self}: {e}") from e raise ValueError(f"Error loading loader for {self}: {e}") from e

View File

@@ -2,70 +2,112 @@
import os import os
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any, cast
from urllib.parse import urlparse
import urllib.request
from crewai_tools.rag.base_loader import BaseLoader, LoaderResult from crewai_tools.rag.base_loader import BaseLoader, LoaderResult
from crewai_tools.rag.source_content import SourceContent from crewai_tools.rag.source_content import SourceContent
class PDFLoader(BaseLoader): class PDFLoader(BaseLoader):
"""Loader for PDF files.""" """Loader for PDF files and URLs."""
def load(self, source: SourceContent, **kwargs) -> LoaderResult: # type: ignore[override] @staticmethod
"""Load and extract text from a PDF file. def _is_url(path: str) -> bool:
"""Check if the path is a URL."""
try:
parsed = urlparse(path)
return parsed.scheme in ("http", "https")
except Exception:
return False
@staticmethod
def _download_pdf(url: str) -> bytes:
"""Download PDF content from a URL.
Args: Args:
source: The source content containing the PDF file path url: The URL to download from.
Returns: Returns:
LoaderResult with extracted text content The PDF content as bytes.
Raises: Raises:
FileNotFoundError: If the PDF file doesn't exist ValueError: If the download fails.
ImportError: If required PDF libraries aren't installed """
try:
with urllib.request.urlopen(url, timeout=30) as response: # noqa: S310
return cast(bytes, response.read())
except Exception as e:
raise ValueError(f"Failed to download PDF from {url}: {e!s}") from e
def load(self, source: SourceContent, **kwargs: Any) -> LoaderResult: # type: ignore[override]
"""Load and extract text from a PDF file or URL.
Args:
source: The source content containing the PDF file path or URL.
Returns:
LoaderResult with extracted text content.
Raises:
FileNotFoundError: If the PDF file doesn't exist.
ImportError: If required PDF libraries aren't installed.
ValueError: If the PDF cannot be read or downloaded.
""" """
try: try:
import pypdf import pymupdf # type: ignore[import-untyped]
except ImportError: except ImportError as e:
try: raise ImportError(
import PyPDF2 as pypdf # type: ignore[import-not-found,no-redef] # noqa: N813 "PDF support requires pymupdf. Install with: uv add pymupdf"
except ImportError as e: ) from e
raise ImportError(
"PDF support requires pypdf or PyPDF2. Install with: uv add pypdf"
) from e
file_path = source.source file_path = source.source
is_url = self._is_url(file_path)
if not os.path.isfile(file_path): if is_url:
raise FileNotFoundError(f"PDF file not found: {file_path}") source_name = Path(urlparse(file_path).path).name or "downloaded.pdf"
else:
source_name = Path(file_path).name
text_content = [] text_content: list[str] = []
metadata: dict[str, Any] = { metadata: dict[str, Any] = {
"source": str(file_path), "source": file_path,
"file_name": Path(file_path).name, "file_name": source_name,
"file_type": "pdf", "file_type": "pdf",
} }
try: try:
with open(file_path, "rb") as file: if is_url:
pdf_reader = pypdf.PdfReader(file) pdf_bytes = self._download_pdf(file_path)
metadata["num_pages"] = len(pdf_reader.pages) doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
else:
if not os.path.isfile(file_path):
raise FileNotFoundError(f"PDF file not found: {file_path}")
doc = pymupdf.open(file_path)
for page_num, page in enumerate(pdf_reader.pages, 1): metadata["num_pages"] = len(doc)
page_text = page.extract_text()
if page_text.strip(): for page_num, page in enumerate(doc, 1):
text_content.append(f"Page {page_num}:\n{page_text}") page_text = page.get_text()
if page_text.strip():
text_content.append(f"Page {page_num}:\n{page_text}")
doc.close()
except FileNotFoundError:
raise
except Exception as e: except Exception as e:
raise ValueError(f"Error reading PDF file {file_path}: {e!s}") from e raise ValueError(f"Error reading PDF from {file_path}: {e!s}") from e
if not text_content: if not text_content:
content = f"[PDF file with no extractable text: {Path(file_path).name}]" content = f"[PDF file with no extractable text: {source_name}]"
else: else:
content = "\n\n".join(text_content) content = "\n\n".join(text_content)
return LoaderResult( return LoaderResult(
content=content, content=content,
source=str(file_path), source=file_path,
metadata=metadata, metadata=metadata,
doc_id=self.generate_doc_id(source_ref=str(file_path), content=content), doc_id=self.generate_doc_id(source_ref=file_path, content=content),
) )

View File

@@ -14,9 +14,14 @@ from pydantic import (
field_validator, field_validator,
model_validator, model_validator,
) )
from typing_extensions import Self from typing_extensions import Self, Unpack
from crewai_tools.tools.rag.types import RagToolConfig, VectorDbConfig from crewai_tools.tools.rag.types import (
AddDocumentParams,
ContentItem,
RagToolConfig,
VectorDbConfig,
)
def _validate_embedding_config( def _validate_embedding_config(
@@ -72,6 +77,8 @@ def _validate_embedding_config(
class Adapter(BaseModel, ABC): class Adapter(BaseModel, ABC):
"""Abstract base class for RAG adapters."""
model_config = ConfigDict(arbitrary_types_allowed=True) model_config = ConfigDict(arbitrary_types_allowed=True)
@abstractmethod @abstractmethod
@@ -86,8 +93,8 @@ class Adapter(BaseModel, ABC):
@abstractmethod @abstractmethod
def add( def add(
self, self,
*args: Any, *args: ContentItem,
**kwargs: Any, **kwargs: Unpack[AddDocumentParams],
) -> None: ) -> None:
"""Add content to the knowledge base.""" """Add content to the knowledge base."""
@@ -102,7 +109,11 @@ class RagTool(BaseTool):
) -> str: ) -> str:
raise NotImplementedError raise NotImplementedError
def add(self, *args: Any, **kwargs: Any) -> None: def add(
self,
*args: ContentItem,
**kwargs: Unpack[AddDocumentParams],
) -> None:
raise NotImplementedError raise NotImplementedError
name: str = "Knowledge base" name: str = "Knowledge base"
@@ -207,9 +218,34 @@ class RagTool(BaseTool):
def add( def add(
self, self,
*args: Any, *args: ContentItem,
**kwargs: Any, **kwargs: Unpack[AddDocumentParams],
) -> None: ) -> None:
"""Add content to the knowledge base.
Args:
*args: Content items to add (strings, paths, or document dicts)
data_type: DataType enum or string (e.g., "file", "pdf_file", "text")
path: Path to file or directory, alias to positional arg
file_path: Alias for path
metadata: Additional metadata to attach to documents
url: URL to fetch content from
website: Website URL to scrape
github_url: GitHub repository URL
youtube_url: YouTube video URL
directory_path: Path to directory
Examples:
rag_tool.add("path/to/document.pdf", data_type=DataType.PDF_FILE)
# Keyword argument (documented API)
rag_tool.add(path="path/to/document.pdf", data_type="file")
rag_tool.add(file_path="path/to/document.pdf", data_type="pdf_file")
# Auto-detect type from extension
rag_tool.add("path/to/document.pdf") # auto-detects PDF
"""
self.adapter.add(*args, **kwargs) self.adapter.add(*args, **kwargs)
def _run( def _run(

View File

@@ -1,10 +1,50 @@
"""Type definitions for RAG tool configuration.""" """Type definitions for RAG tool configuration."""
from typing import Any, Literal from pathlib import Path
from typing import Any, Literal, TypeAlias
from crewai.rag.embeddings.types import ProviderSpec from crewai.rag.embeddings.types import ProviderSpec
from typing_extensions import TypedDict from typing_extensions import TypedDict
from crewai_tools.rag.data_types import DataType
DataTypeStr: TypeAlias = Literal[
"file",
"pdf_file",
"text_file",
"csv",
"json",
"xml",
"docx",
"mdx",
"mysql",
"postgres",
"github",
"directory",
"website",
"docs_site",
"youtube_video",
"youtube_channel",
"text",
]
ContentItem: TypeAlias = str | Path | dict[str, Any]
class AddDocumentParams(TypedDict, total=False):
"""Parameters for adding documents to the RAG system."""
data_type: DataType | DataTypeStr
metadata: dict[str, Any]
path: str | Path
file_path: str | Path
website: str
url: str
github_url: str
youtube_url: str
directory_path: str | Path
class VectorDbConfig(TypedDict): class VectorDbConfig(TypedDict):
"""Configuration for vector database provider. """Configuration for vector database provider.

View File

@@ -0,0 +1,471 @@
"""Tests for RagTool.add() method with various data_type values."""
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import MagicMock, Mock, patch
import pytest
from crewai_tools.rag.data_types import DataType
from crewai_tools.tools.rag.rag_tool import RagTool
@pytest.fixture
def mock_rag_client() -> MagicMock:
"""Create a mock RAG client for testing."""
mock_client = MagicMock()
mock_client.get_or_create_collection = MagicMock(return_value=None)
mock_client.add_documents = MagicMock(return_value=None)
mock_client.search = MagicMock(return_value=[])
return mock_client
@pytest.fixture
def rag_tool(mock_rag_client: MagicMock) -> RagTool:
"""Create a RagTool instance with mocked client."""
with (
patch(
"crewai_tools.adapters.crewai_rag_adapter.get_rag_client",
return_value=mock_rag_client,
),
patch(
"crewai_tools.adapters.crewai_rag_adapter.create_client",
return_value=mock_rag_client,
),
):
return RagTool()
class TestDataTypeFileAlias:
"""Tests for data_type='file' alias."""
def test_file_alias_with_existing_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that data_type='file' works with existing files."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Test content for file alias.")
rag_tool.add(path=str(test_file), data_type="file")
assert mock_rag_client.add_documents.called
def test_file_alias_with_nonexistent_file_raises_error(
self, rag_tool: RagTool
) -> None:
"""Test that data_type='file' raises FileNotFoundError for missing files."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent/path/to/file.pdf", data_type="file")
def test_file_alias_with_path_keyword(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that path keyword argument works with data_type='file'."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "document.txt"
test_file.write_text("Content via path keyword.")
rag_tool.add(data_type="file", path=str(test_file))
assert mock_rag_client.add_documents.called
def test_file_alias_with_file_path_keyword(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that file_path keyword argument works with data_type='file'."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "document.txt"
test_file.write_text("Content via file_path keyword.")
rag_tool.add(data_type="file", file_path=str(test_file))
assert mock_rag_client.add_documents.called
class TestDataTypeStringValues:
"""Tests for data_type as string values matching DataType enum."""
def test_pdf_file_string(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type='pdf_file' with existing PDF file."""
with TemporaryDirectory() as tmpdir:
# Create a minimal valid PDF file
test_file = Path(tmpdir) / "test.pdf"
test_file.write_bytes(
b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\ntrailer\n"
b"<<\n/Root 1 0 R\n>>\n%%EOF"
)
# Mock the PDF loader to avoid actual PDF parsing
with patch(
"crewai_tools.adapters.crewai_rag_adapter.DataType.get_loader"
) as mock_loader:
mock_loader_instance = MagicMock()
mock_loader_instance.load.return_value = MagicMock(
content="PDF content", metadata={}, doc_id="test-id"
)
mock_loader.return_value = mock_loader_instance
rag_tool.add(path=str(test_file), data_type="pdf_file")
assert mock_rag_client.add_documents.called
def test_text_file_string(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type='text_file' with existing text file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Plain text content.")
rag_tool.add(path=str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_csv_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='csv' with existing CSV file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.csv"
test_file.write_text("name,value\nfoo,1\nbar,2")
rag_tool.add(path=str(test_file), data_type="csv")
assert mock_rag_client.add_documents.called
def test_json_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='json' with existing JSON file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.json"
test_file.write_text('{"key": "value", "items": [1, 2, 3]}')
rag_tool.add(path=str(test_file), data_type="json")
assert mock_rag_client.add_documents.called
def test_xml_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='xml' with existing XML file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.xml"
test_file.write_text('<?xml version="1.0"?><root><item>value</item></root>')
rag_tool.add(path=str(test_file), data_type="xml")
assert mock_rag_client.add_documents.called
def test_mdx_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='mdx' with existing MDX file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.mdx"
test_file.write_text("# Heading\n\nSome markdown content.")
rag_tool.add(path=str(test_file), data_type="mdx")
assert mock_rag_client.add_documents.called
def test_text_string(self, rag_tool: RagTool, mock_rag_client: MagicMock) -> None:
"""Test data_type='text' with raw text content."""
rag_tool.add("This is raw text content.", data_type="text")
assert mock_rag_client.add_documents.called
def test_directory_string(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type='directory' with existing directory."""
with TemporaryDirectory() as tmpdir:
# Create some files in the directory
(Path(tmpdir) / "file1.txt").write_text("Content 1")
(Path(tmpdir) / "file2.txt").write_text("Content 2")
rag_tool.add(path=tmpdir, data_type="directory")
assert mock_rag_client.add_documents.called
class TestDataTypeEnumValues:
"""Tests for data_type as DataType enum values."""
def test_datatype_file_enum_with_existing_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.FILE with existing file (auto-detect)."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("File enum auto-detect content.")
rag_tool.add(str(test_file), data_type=DataType.FILE)
assert mock_rag_client.add_documents.called
def test_datatype_file_enum_with_nonexistent_file_raises_error(
self, rag_tool: RagTool
) -> None:
"""Test data_type=DataType.FILE raises FileNotFoundError for missing files."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add("nonexistent/file.pdf", data_type=DataType.FILE)
def test_datatype_pdf_file_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.PDF_FILE with existing file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.pdf"
test_file.write_bytes(
b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\ntrailer\n"
b"<<\n/Root 1 0 R\n>>\n%%EOF"
)
with patch(
"crewai_tools.adapters.crewai_rag_adapter.DataType.get_loader"
) as mock_loader:
mock_loader_instance = MagicMock()
mock_loader_instance.load.return_value = MagicMock(
content="PDF content", metadata={}, doc_id="test-id"
)
mock_loader.return_value = mock_loader_instance
rag_tool.add(str(test_file), data_type=DataType.PDF_FILE)
assert mock_rag_client.add_documents.called
def test_datatype_text_file_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.TEXT_FILE with existing file."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Text file content.")
rag_tool.add(str(test_file), data_type=DataType.TEXT_FILE)
assert mock_rag_client.add_documents.called
def test_datatype_text_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.TEXT with raw text."""
rag_tool.add("Raw text using enum.", data_type=DataType.TEXT)
assert mock_rag_client.add_documents.called
def test_datatype_directory_enum(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test data_type=DataType.DIRECTORY with existing directory."""
with TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "file.txt").write_text("Directory file content.")
rag_tool.add(tmpdir, data_type=DataType.DIRECTORY)
assert mock_rag_client.add_documents.called
class TestInvalidDataType:
"""Tests for invalid data_type values."""
def test_invalid_string_data_type_raises_error(self, rag_tool: RagTool) -> None:
"""Test that invalid string data_type raises ValueError."""
with pytest.raises(ValueError, match="Invalid data_type"):
rag_tool.add("some content", data_type="invalid_type")
def test_invalid_data_type_error_message_contains_valid_values(
self, rag_tool: RagTool
) -> None:
"""Test that error message lists valid data_type values."""
with pytest.raises(ValueError) as exc_info:
rag_tool.add("some content", data_type="not_a_type")
error_message = str(exc_info.value)
assert "file" in error_message
assert "pdf_file" in error_message
assert "text_file" in error_message
class TestFileExistenceValidation:
"""Tests for file existence validation."""
def test_pdf_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent PDF file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.pdf", data_type="pdf_file")
def test_text_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent text file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.txt", data_type="text_file")
def test_csv_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent CSV file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.csv", data_type="csv")
def test_json_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent JSON file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.json", data_type="json")
def test_xml_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent XML file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.xml", data_type="xml")
def test_docx_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent DOCX file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.docx", data_type="docx")
def test_mdx_file_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent MDX file raises FileNotFoundError."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add(path="nonexistent.mdx", data_type="mdx")
def test_directory_not_found_raises_error(self, rag_tool: RagTool) -> None:
"""Test that non-existent directory raises ValueError."""
with pytest.raises(ValueError, match="Directory does not exist"):
rag_tool.add(path="nonexistent/directory", data_type="directory")
class TestKeywordArgumentVariants:
"""Tests for different keyword argument combinations."""
def test_positional_argument_with_data_type(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test positional argument with data_type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Positional arg content.")
rag_tool.add(str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_path_keyword_with_data_type(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test path keyword argument with data_type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Path keyword content.")
rag_tool.add(path=str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_file_path_keyword_with_data_type(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test file_path keyword argument with data_type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("File path keyword content.")
rag_tool.add(file_path=str(test_file), data_type="text_file")
assert mock_rag_client.add_documents.called
def test_directory_path_keyword(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test directory_path keyword argument."""
with TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "file.txt").write_text("Directory content.")
rag_tool.add(directory_path=tmpdir)
assert mock_rag_client.add_documents.called
class TestAutoDetection:
"""Tests for auto-detection of data type from content."""
def test_auto_detect_nonexistent_file_raises_error(self, rag_tool: RagTool) -> None:
"""Test that auto-detection raises FileNotFoundError for missing files."""
with pytest.raises(FileNotFoundError, match="File does not exist"):
rag_tool.add("path/to/document.pdf")
def test_auto_detect_txt_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of .txt file type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "auto.txt"
test_file.write_text("Auto-detected text file.")
# No data_type specified - should auto-detect
rag_tool.add(str(test_file))
assert mock_rag_client.add_documents.called
def test_auto_detect_csv_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of .csv file type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "auto.csv"
test_file.write_text("col1,col2\nval1,val2")
rag_tool.add(str(test_file))
assert mock_rag_client.add_documents.called
def test_auto_detect_json_file(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of .json file type."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "auto.json"
test_file.write_text('{"auto": "detected"}')
rag_tool.add(str(test_file))
assert mock_rag_client.add_documents.called
def test_auto_detect_directory(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of directory type."""
with TemporaryDirectory() as tmpdir:
(Path(tmpdir) / "file.txt").write_text("Auto-detected directory.")
rag_tool.add(tmpdir)
assert mock_rag_client.add_documents.called
def test_auto_detect_raw_text(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test auto-detection of raw text (non-file content)."""
rag_tool.add("Just some raw text content")
assert mock_rag_client.add_documents.called
class TestMetadataHandling:
"""Tests for metadata handling with data_type."""
def test_metadata_passed_to_documents(
self, rag_tool: RagTool, mock_rag_client: MagicMock
) -> None:
"""Test that metadata is properly passed to documents."""
with TemporaryDirectory() as tmpdir:
test_file = Path(tmpdir) / "test.txt"
test_file.write_text("Content with metadata.")
rag_tool.add(
path=str(test_file),
data_type="text_file",
metadata={"custom_key": "custom_value"},
)
assert mock_rag_client.add_documents.called
call_args = mock_rag_client.add_documents.call_args
documents = call_args.kwargs.get("documents", call_args.args[0] if call_args.args else [])
# Check that at least one document has the custom metadata
assert any(
doc.get("metadata", {}).get("custom_key") == "custom_value"
for doc in documents
)

View File

@@ -66,7 +66,6 @@ class SSETransport(BaseTransport):
self._transport_context = sse_client( self._transport_context = sse_client(
self.url, self.url,
headers=self.headers if self.headers else None, headers=self.headers if self.headers else None,
terminate_on_close=True,
) )
read, write = await self._transport_context.__aenter__() read, write = await self._transport_context.__aenter__()

View File

@@ -0,0 +1,22 @@
"""Tests for SSE transport."""
import pytest
from crewai.mcp.transports.sse import SSETransport
@pytest.mark.asyncio
async def test_sse_transport_connect_does_not_pass_invalid_args():
"""Test that SSETransport.connect() doesn't pass invalid args to sse_client.
The sse_client function does not accept terminate_on_close parameter.
"""
transport = SSETransport(
url="http://localhost:9999/sse",
headers={"Authorization": "Bearer test"},
)
with pytest.raises(ConnectionError) as exc_info:
await transport.connect()
assert "unexpected keyword argument" not in str(exc_info.value)

26
uv.lock generated
View File

@@ -1225,7 +1225,7 @@ dependencies = [
{ name = "crewai" }, { name = "crewai" },
{ name = "docker" }, { name = "docker" },
{ name = "lancedb" }, { name = "lancedb" },
{ name = "pypdf" }, { name = "pymupdf" },
{ name = "python-docx" }, { name = "python-docx" },
{ name = "pytube" }, { name = "pytube" },
{ name = "requests" }, { name = "requests" },
@@ -1382,8 +1382,8 @@ requires-dist = [
{ name = "psycopg2-binary", marker = "extra == 'postgresql'", specifier = ">=2.9.10" }, { name = "psycopg2-binary", marker = "extra == 'postgresql'", specifier = ">=2.9.10" },
{ name = "pygithub", marker = "extra == 'github'", specifier = "==1.59.1" }, { name = "pygithub", marker = "extra == 'github'", specifier = "==1.59.1" },
{ name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.13" }, { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.13" },
{ name = "pymupdf", specifier = ">=1.26.6" },
{ name = "pymysql", marker = "extra == 'mysql'", specifier = ">=1.1.1" }, { name = "pymysql", marker = "extra == 'mysql'", specifier = ">=1.1.1" },
{ name = "pypdf", specifier = ">=5.9.0" },
{ name = "python-docx", specifier = ">=1.2.0" }, { name = "python-docx", specifier = ">=1.2.0" },
{ name = "python-docx", marker = "extra == 'rag'", specifier = ">=1.1.0" }, { name = "python-docx", marker = "extra == 'rag'", specifier = ">=1.1.0" },
{ name = "pytube", specifier = ">=15.0.0" }, { name = "pytube", specifier = ">=15.0.0" },
@@ -2224,6 +2224,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" },
{ url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" },
{ url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" }, { url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" },
{ url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" },
{ url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" },
{ url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" }, { url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" },
{ url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" },
{ url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" },
@@ -2233,6 +2235,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" },
{ url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" },
{ url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" }, { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" },
{ url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" },
{ url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" },
{ url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" }, { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" },
{ url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
{ url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
@@ -2242,6 +2246,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
{ url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
{ url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
{ url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
{ url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
{ url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
{ url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
{ url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
@@ -2251,6 +2257,8 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
{ url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
{ url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" }, { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
{ url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
{ url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
{ url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" }, { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
] ]
@@ -5970,6 +5978,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/48/7c/42f0b6997324023e94939f8f32b9a8dd928499f4b5d7b4412905368686b5/pymongo-4.15.3-cp313-cp313-win_arm64.whl", hash = "sha256:fb384623ece34db78d445dd578a52d28b74e8319f4d9535fbaff79d0eae82b3d", size = 944300, upload-time = "2025-10-07T21:56:58.969Z" }, { url = "https://files.pythonhosted.org/packages/48/7c/42f0b6997324023e94939f8f32b9a8dd928499f4b5d7b4412905368686b5/pymongo-4.15.3-cp313-cp313-win_arm64.whl", hash = "sha256:fb384623ece34db78d445dd578a52d28b74e8319f4d9535fbaff79d0eae82b3d", size = 944300, upload-time = "2025-10-07T21:56:58.969Z" },
] ]
[[package]]
name = "pymupdf"
version = "1.26.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ec/d7/a6f0e03a117fa2ad79c4b898203bb212b17804f92558a6a339298faca7bb/pymupdf-1.26.6.tar.gz", hash = "sha256:a2b4531cd4ab36d6f1f794bb6d3c33b49bda22f36d58bb1f3e81cbc10183bd2b", size = 84322494, upload-time = "2025-11-05T15:20:46.786Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/5c/dec354eee5fe4966c715f33818ed4193e0e6c986cf8484de35b6c167fb8e/pymupdf-1.26.6-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:e46f320a136ad55e5219e8f0f4061bdf3e4c12b126d2740d5a49f73fae7ea176", size = 23178988, upload-time = "2025-11-05T14:31:19.834Z" },
{ url = "https://files.pythonhosted.org/packages/ec/a0/11adb742d18142bd623556cd3b5d64649816decc5eafd30efc9498657e76/pymupdf-1.26.6-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:6844cd2396553c0fa06de4869d5d5ecb1260e6fc3b9d85abe8fa35f14dd9d688", size = 22469764, upload-time = "2025-11-05T14:32:34.654Z" },
{ url = "https://files.pythonhosted.org/packages/e4/c8/377cf20e31f58d4c243bfcf2d3cb7466d5b97003b10b9f1161f11eb4a994/pymupdf-1.26.6-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:617ba69e02c44f0da1c0e039ea4a26cf630849fd570e169c71daeb8ac52a81d6", size = 23502227, upload-time = "2025-11-06T11:03:56.934Z" },
{ url = "https://files.pythonhosted.org/packages/4f/bf/6e02e3d84b32c137c71a0a3dcdba8f2f6e9950619a3bc272245c7c06a051/pymupdf-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:7777d0b7124c2ebc94849536b6a1fb85d158df3b9d873935e63036559391534c", size = 24115381, upload-time = "2025-11-05T14:33:54.338Z" },
{ url = "https://files.pythonhosted.org/packages/ab/9d/30f7fcb3776bfedde66c06297960debe4883b1667294a1ee9426c942e94d/pymupdf-1.26.6-cp310-abi3-win32.whl", hash = "sha256:8f3ef05befc90ca6bb0f12983200a7048d5bff3e1c1edef1bb3de60b32cb5274", size = 17203613, upload-time = "2025-11-05T17:19:47.494Z" },
{ url = "https://files.pythonhosted.org/packages/f9/e8/989f4eaa369c7166dc24f0eaa3023f13788c40ff1b96701f7047421554a8/pymupdf-1.26.6-cp310-abi3-win_amd64.whl", hash = "sha256:ce02ca96ed0d1acfd00331a4d41a34c98584d034155b06fd4ec0f051718de7ba", size = 18405680, upload-time = "2025-11-05T14:34:48.672Z" },
]
[[package]] [[package]]
name = "pymysql" name = "pymysql"
version = "1.1.2" version = "1.1.2"