feat: add format hints to audio/video duration detection

This commit is contained in:
Greyson LaLonde
2026-01-22 14:02:55 -05:00
parent 9fec81f976
commit 9be88e05ee
11 changed files with 936 additions and 43 deletions

View File

@@ -104,6 +104,8 @@ file-processing = [
"python-magic>=0.4.27",
"aiocache~=0.12.3",
"aiofiles~=24.1.0",
"tinytag~=1.10.0",
"av~=13.0.0",
]

View File

@@ -32,6 +32,7 @@ from crewai.files.file import (
FileSource,
FileSourceInput,
FileStream,
FileUrl,
RawFileInput,
)
from crewai.files.processing import (
@@ -103,6 +104,7 @@ __all__ = [
"FileStream",
"FileTooLargeError",
"FileUploader",
"FileUrl",
"FileValidationError",
"ImageConstraints",
"ImageExtension",

View File

@@ -16,6 +16,7 @@ from crewai.files.file import (
FilePath,
FileSource,
FileStream,
FileUrl,
)
from crewai.files.utils import is_file_source
@@ -29,12 +30,14 @@ class _FileSourceCoercer:
@classmethod
def _coerce(cls, v: Any) -> FileSource:
"""Convert raw input to appropriate FileSource type."""
if isinstance(v, (FilePath, FileBytes, FileStream)):
if isinstance(v, (FilePath, FileBytes, FileStream, FileUrl)):
return v
if isinstance(v, str):
if v.startswith(("http://", "https://")):
return FileUrl(url=v)
return FilePath(path=Path(v))
if isinstance(v, Path):
return FilePath(path=v)
if isinstance(v, str):
return FilePath(path=Path(v))
if isinstance(v, bytes):
return FileBytes(data=v)
if isinstance(v, (IOBase, BinaryIO)):
@@ -203,7 +206,7 @@ class BaseFile(ABC, BaseModel):
TypeError: If the underlying source doesn't support async read.
"""
source = self._file_source
if isinstance(source, (FilePath, FileBytes, AsyncFileStream)):
if isinstance(source, (FilePath, FileBytes, AsyncFileStream, FileUrl)):
return await source.aread()
raise TypeError(f"{type(source).__name__} does not support async read")

View File

@@ -414,17 +414,84 @@ class AsyncFileStream(BaseModel):
yield chunk
FileSource = FilePath | FileBytes | FileStream | AsyncFileStream
class FileUrl(BaseModel):
"""File referenced by URL.
For providers that support URL references, the URL is passed directly.
For providers that don't, content is fetched on demand.
Attributes:
url: URL where the file can be accessed.
filename: Optional filename (extracted from URL if not provided).
"""
url: str = Field(description="URL where the file can be accessed.")
filename: str | None = Field(default=None, description="Optional filename.")
_content_type: str | None = PrivateAttr(default=None)
_content: bytes | None = PrivateAttr(default=None)
@model_validator(mode="after")
def _validate_url(self) -> FileUrl:
"""Validate URL format."""
if not self.url.startswith(("http://", "https://")):
raise ValueError(f"Invalid URL scheme: {self.url}")
return self
@property
def content_type(self) -> str:
"""Get the content type, guessing from URL extension if not set."""
if self._content_type is None:
self._content_type = self._guess_content_type()
return self._content_type
def _guess_content_type(self) -> str:
"""Guess content type from URL extension."""
from urllib.parse import urlparse
parsed = urlparse(self.url)
path = parsed.path
guessed, _ = mimetypes.guess_type(path)
return guessed or "application/octet-stream"
def read(self) -> bytes:
"""Fetch content from URL (for providers that don't support URL references)."""
if self._content is None:
import httpx
response = httpx.get(self.url, follow_redirects=True)
response.raise_for_status()
self._content = response.content
if "content-type" in response.headers:
self._content_type = response.headers["content-type"].split(";")[0]
return self._content
async def aread(self) -> bytes:
"""Async fetch content from URL."""
if self._content is None:
import httpx
async with httpx.AsyncClient() as client:
response = await client.get(self.url, follow_redirects=True)
response.raise_for_status()
self._content = response.content
if "content-type" in response.headers:
self._content_type = response.headers["content-type"].split(";")[0]
return self._content
FileSource = FilePath | FileBytes | FileStream | AsyncFileStream | FileUrl
def _normalize_source(value: Any) -> FileSource:
"""Convert raw input to appropriate source type."""
if isinstance(value, (FilePath, FileBytes, FileStream, AsyncFileStream)):
if isinstance(value, (FilePath, FileBytes, FileStream, AsyncFileStream, FileUrl)):
return value
if isinstance(value, str):
if value.startswith(("http://", "https://")):
return FileUrl(url=value)
return FilePath(path=Path(value))
if isinstance(value, Path):
return FilePath(path=value)
if isinstance(value, str):
return FilePath(path=Path(value))
if isinstance(value, bytes):
return FileBytes(data=value)
if isinstance(value, AsyncReadable):

View File

@@ -148,6 +148,7 @@ class ProviderConstraints:
general_max_size_bytes: Maximum size for any file type.
supports_file_upload: Whether the provider supports file upload APIs.
file_upload_threshold_bytes: Size threshold above which to use file upload.
supports_url_references: Whether the provider supports URL-based file references.
"""
name: ProviderName
@@ -158,21 +159,24 @@ class ProviderConstraints:
general_max_size_bytes: int | None = None
supports_file_upload: bool = False
file_upload_threshold_bytes: int | None = None
supports_url_references: bool = False
ANTHROPIC_CONSTRAINTS = ProviderConstraints(
name="anthropic",
image=ImageConstraints(
max_size_bytes=5_242_880,
max_size_bytes=5_242_880, # 5 MB per image
max_width=8000,
max_height=8000,
max_images_per_request=100,
),
pdf=PDFConstraints(
max_size_bytes=31_457_280,
max_size_bytes=33_554_432, # 32 MB request size limit
max_pages=100,
),
supports_file_upload=True,
file_upload_threshold_bytes=5_242_880,
supports_url_references=True,
)
OPENAI_CONSTRAINTS = ProviderConstraints(
@@ -181,8 +185,13 @@ OPENAI_CONSTRAINTS = ProviderConstraints(
max_size_bytes=20_971_520,
max_images_per_request=10,
),
audio=AudioConstraints(
max_size_bytes=26_214_400, # 25 MB - whisper limit
max_duration_seconds=1500, # 25 minutes, arbitrary-ish, this is from the transcriptions limit
),
supports_file_upload=True,
file_upload_threshold_bytes=5_242_880,
supports_url_references=True,
)
GEMINI_CONSTRAINTS = ProviderConstraints(
@@ -196,14 +205,17 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
),
audio=AudioConstraints(
max_size_bytes=104_857_600,
max_duration_seconds=34200, # 9.5 hours
supported_formats=GEMINI_AUDIO_FORMATS,
),
video=VideoConstraints(
max_size_bytes=2_147_483_648,
max_duration_seconds=3600, # 1 hour at default resolution
supported_formats=GEMINI_VIDEO_FORMATS,
),
supports_file_upload=True,
file_upload_threshold_bytes=20_971_520,
supports_url_references=True,
)
BEDROCK_CONSTRAINTS = ProviderConstraints(
@@ -225,6 +237,11 @@ AZURE_CONSTRAINTS = ProviderConstraints(
max_size_bytes=20_971_520,
max_images_per_request=10,
),
audio=AudioConstraints(
max_size_bytes=26_214_400, # 25 MB - same as openai
max_duration_seconds=1500, # 25 minutes - same as openai
),
supports_url_references=True,
)

View File

@@ -43,7 +43,7 @@ def _get_image_dimensions(content: bytes) -> tuple[int, int] | None:
with Image.open(io.BytesIO(content)) as img:
width, height = img.size
return (int(width), int(height))
return int(width), int(height)
except ImportError:
logger.warning(
"Pillow not installed - cannot validate image dimensions. "
@@ -74,6 +74,81 @@ def _get_pdf_page_count(content: bytes) -> int | None:
return None
def _get_audio_duration(content: bytes, filename: str | None = None) -> float | None:
"""Get audio duration in seconds using tinytag if available.
Args:
content: Raw audio bytes.
filename: Optional filename for format detection hint.
Returns:
Duration in seconds or None if tinytag unavailable.
"""
try:
from tinytag import TinyTag # type: ignore[import-untyped]
except ImportError:
logger.warning(
"tinytag not installed - cannot validate audio duration. "
"Install with: pip install tinytag"
)
return None
try:
tag = TinyTag.get(file_obj=io.BytesIO(content), filename=filename)
duration: float | None = tag.duration
return duration
except Exception as e:
logger.debug(f"Could not determine audio duration: {e}")
return None
_VIDEO_FORMAT_MAP: dict[str, str] = {
"video/mp4": "mp4",
"video/webm": "webm",
"video/x-matroska": "matroska",
"video/quicktime": "mov",
"video/x-msvideo": "avi",
"video/x-flv": "flv",
}
def _get_video_duration(
content: bytes, content_type: str | None = None
) -> float | None:
"""Get video duration in seconds using av if available.
Args:
content: Raw video bytes.
content_type: Optional MIME type for format detection hint.
Returns:
Duration in seconds or None if av unavailable.
"""
try:
import av
except ImportError:
logger.warning(
"av (PyAV) not installed - cannot validate video duration. "
"Install with: pip install av"
)
return None
format_hint = _VIDEO_FORMAT_MAP.get(content_type) if content_type else None
try:
container = av.open(io.BytesIO(content), format=format_hint) # type: ignore[attr-defined]
try:
duration = getattr(container, "duration", None)
if duration is None:
return None
return float(duration) / 1_000_000
finally:
container.close()
except Exception as e:
logger.debug(f"Could not determine video duration: {e}")
return None
def _format_size(size_bytes: int) -> str:
"""Format byte size to human-readable string."""
if size_bytes >= 1024 * 1024 * 1024:
@@ -273,14 +348,17 @@ def validate_audio(
Raises:
FileTooLargeError: If the file exceeds size limits.
FileValidationError: If the file exceeds duration limits.
UnsupportedFileTypeError: If the format is not supported.
"""
errors: list[str] = []
file_size = len(file.read())
content = file.read()
file_size = len(content)
filename = file.filename
_validate_size(
"Audio",
file.filename,
filename,
file_size,
constraints.max_size_bytes,
errors,
@@ -288,13 +366,24 @@ def validate_audio(
)
_validate_format(
"Audio",
file.filename,
filename,
file.content_type,
constraints.supported_formats,
errors,
raise_on_error,
)
if constraints.max_duration_seconds is not None:
duration = _get_audio_duration(content, filename)
if duration is not None and duration > constraints.max_duration_seconds:
msg = (
f"Audio '{filename}' duration ({duration:.1f}s) exceeds "
f"maximum ({constraints.max_duration_seconds}s)"
)
errors.append(msg)
if raise_on_error:
raise FileValidationError(msg, file_name=filename)
return errors
@@ -316,14 +405,17 @@ def validate_video(
Raises:
FileTooLargeError: If the file exceeds size limits.
FileValidationError: If the file exceeds duration limits.
UnsupportedFileTypeError: If the format is not supported.
"""
errors: list[str] = []
file_size = len(file.read())
content = file.read()
file_size = len(content)
filename = file.filename
_validate_size(
"Video",
file.filename,
filename,
file_size,
constraints.max_size_bytes,
errors,
@@ -331,13 +423,24 @@ def validate_video(
)
_validate_format(
"Video",
file.filename,
filename,
file.content_type,
constraints.supported_formats,
errors,
raise_on_error,
)
if constraints.max_duration_seconds is not None:
duration = _get_video_duration(content, file.content_type)
if duration is not None and duration > constraints.max_duration_seconds:
msg = (
f"Video '{filename}' duration ({duration:.1f}s) exceeds "
f"maximum ({constraints.max_duration_seconds}s)"
)
errors.append(msg)
if raise_on_error:
raise FileValidationError(msg, file_name=filename)
return errors

View File

@@ -8,6 +8,7 @@ import logging
from crewai.files.constants import UPLOAD_MAX_RETRIES, UPLOAD_RETRY_DELAY_BASE
from crewai.files.content_types import FileInput
from crewai.files.file import FileUrl
from crewai.files.metrics import measure_operation
from crewai.files.processing.constraints import (
AudioConstraints,
@@ -22,10 +23,12 @@ from crewai.files.resolved import (
InlineBase64,
InlineBytes,
ResolvedFile,
UrlReference,
)
from crewai.files.upload_cache import CachedUpload, UploadCache
from crewai.files.uploaders import UploadResult, get_uploader
from crewai.files.uploaders.base import FileUploader
from crewai.files.uploaders.factory import ProviderType
logger = logging.getLogger(__name__)
@@ -102,7 +105,49 @@ class FileResolver:
content_type=file.content_type,
)
def resolve(self, file: FileInput, provider: str) -> ResolvedFile:
@staticmethod
def _is_url_source(file: FileInput) -> bool:
"""Check if file source is a URL.
Args:
file: The file to check.
Returns:
True if the file source is a FileUrl, False otherwise.
"""
return isinstance(file._file_source, FileUrl)
@staticmethod
def _supports_url(constraints: ProviderConstraints | None) -> bool:
"""Check if provider supports URL references.
Args:
constraints: Provider constraints.
Returns:
True if the provider supports URL references, False otherwise.
"""
return constraints is not None and constraints.supports_url_references
@staticmethod
def _resolve_as_url(file: FileInput) -> UrlReference:
"""Resolve a URL source as UrlReference.
Args:
file: The file with URL source.
Returns:
UrlReference with the URL and content type.
"""
source = file._file_source
if not isinstance(source, FileUrl):
raise TypeError(f"Expected FileUrl source, got {type(source).__name__}")
return UrlReference(
content_type=file.content_type,
url=source.url,
)
def resolve(self, file: FileInput, provider: ProviderType) -> ResolvedFile:
"""Resolve a file to its delivery format for a provider.
Args:
@@ -112,25 +157,26 @@ class FileResolver:
Returns:
ResolvedFile representing the appropriate delivery format.
"""
provider_lower = provider.lower()
constraints = get_constraints_for_provider(provider)
if self._is_url_source(file) and self._supports_url(constraints):
return self._resolve_as_url(file)
context = self._build_file_context(file)
should_upload = self._should_upload(
file, provider_lower, constraints, context.size
)
should_upload = self._should_upload(file, provider, constraints, context.size)
if should_upload:
resolved = self._resolve_via_upload(file, provider_lower, context)
resolved = self._resolve_via_upload(file, provider, context)
if resolved is not None:
return resolved
return self._resolve_inline(file, provider_lower, context)
return self._resolve_inline(file, provider, context)
def resolve_files(
self,
files: dict[str, FileInput],
provider: str,
provider: ProviderType,
) -> dict[str, ResolvedFile]:
"""Resolve multiple files for a provider.
@@ -220,7 +266,7 @@ class FileResolver:
def _resolve_via_upload(
self,
file: FileInput,
provider: str,
provider: ProviderType,
context: FileContext,
) -> ResolvedFile | None:
"""Resolve a file by uploading it.
@@ -367,7 +413,7 @@ class FileResolver:
data=encoded,
)
async def aresolve(self, file: FileInput, provider: str) -> ResolvedFile:
async def aresolve(self, file: FileInput, provider: ProviderType) -> ResolvedFile:
"""Async resolve a file to its delivery format for a provider.
Args:
@@ -377,25 +423,26 @@ class FileResolver:
Returns:
ResolvedFile representing the appropriate delivery format.
"""
provider_lower = provider.lower()
constraints = get_constraints_for_provider(provider)
if self._is_url_source(file) and self._supports_url(constraints):
return self._resolve_as_url(file)
context = self._build_file_context(file)
should_upload = self._should_upload(
file, provider_lower, constraints, context.size
)
should_upload = self._should_upload(file, provider, constraints, context.size)
if should_upload:
resolved = await self._aresolve_via_upload(file, provider_lower, context)
resolved = await self._aresolve_via_upload(file, provider, context)
if resolved is not None:
return resolved
return self._resolve_inline(file, provider_lower, context)
return self._resolve_inline(file, provider, context)
async def aresolve_files(
self,
files: dict[str, FileInput],
provider: str,
provider: ProviderType,
max_concurrency: int = 10,
) -> dict[str, ResolvedFile]:
"""Async resolve multiple files in parallel.
@@ -434,7 +481,7 @@ class FileResolver:
async def _aresolve_via_upload(
self,
file: FileInput,
provider: str,
provider: ProviderType,
context: FileContext,
) -> ResolvedFile | None:
"""Async resolve a file by uploading it.
@@ -552,7 +599,7 @@ class FileResolver:
)
return None
def _get_uploader(self, provider: str) -> FileUploader | None:
def _get_uploader(self, provider: ProviderType) -> FileUploader | None:
"""Get or create an uploader for a provider.
Args:

View File

@@ -15,9 +15,9 @@ if TYPE_CHECKING:
def is_file_source(v: object) -> TypeIs[FileSource]:
"""Type guard to narrow input to FileSource."""
from crewai.files.file import FileBytes, FilePath, FileStream
from crewai.files.file import FileBytes, FilePath, FileStream, FileUrl
return isinstance(v, (FilePath, FileBytes, FileStream))
return isinstance(v, (FilePath, FileBytes, FileStream, FileUrl))
def wrap_file_source(source: FileSource) -> FileInput:
@@ -62,7 +62,7 @@ def normalize_input_files(
Dictionary mapping names to FileInput wrappers.
"""
from crewai.files.content_types import BaseFile
from crewai.files.file import FileBytes, FilePath, FileStream
from crewai.files.file import FileBytes, FilePath, FileStream, FileUrl
result: dict[str, FileInput] = {}
@@ -74,13 +74,16 @@ def normalize_input_files(
result[name] = item
continue
file_source: FilePath | FileBytes | FileStream
if isinstance(item, (FilePath, FileBytes, FileStream)):
file_source: FilePath | FileBytes | FileStream | FileUrl
if isinstance(item, (FilePath, FileBytes, FileStream, FileUrl)):
file_source = item
elif isinstance(item, Path):
file_source = FilePath(path=item)
elif isinstance(item, str):
file_source = FilePath(path=Path(item))
if item.startswith(("http://", "https://")):
file_source = FileUrl(url=item)
else:
file_source = FilePath(path=Path(item))
elif isinstance(item, (bytes, memoryview)):
file_source = FileBytes(data=bytes(item))
else:

View File

@@ -1,13 +1,17 @@
"""Tests for file validators."""
from unittest.mock import patch
import pytest
from crewai.files import FileBytes, ImageFile, PDFFile, TextFile
from crewai.files import AudioFile, FileBytes, ImageFile, PDFFile, TextFile, VideoFile
from crewai.files.processing.constraints import (
ANTHROPIC_CONSTRAINTS,
AudioConstraints,
ImageConstraints,
PDFConstraints,
ProviderConstraints,
VideoConstraints,
)
from crewai.files.processing.exceptions import (
FileTooLargeError,
@@ -15,10 +19,14 @@ from crewai.files.processing.exceptions import (
UnsupportedFileTypeError,
)
from crewai.files.processing.validators import (
_get_audio_duration,
_get_video_duration,
validate_audio,
validate_file,
validate_image,
validate_pdf,
validate_text,
validate_video,
)
@@ -206,3 +214,281 @@ class TestValidateFile:
validate_file(file, constraints)
assert "does not support PDFs" in str(exc_info.value)
# Minimal audio bytes for testing (not a valid audio file, used for mocked tests)
MINIMAL_AUDIO = b"\x00" * 100
# Minimal video bytes for testing (not a valid video file, used for mocked tests)
MINIMAL_VIDEO = b"\x00" * 100
# Fallback content type when python-magic cannot detect
FALLBACK_CONTENT_TYPE = "application/octet-stream"
class TestValidateAudio:
"""Tests for validate_audio function and audio duration validation."""
def test_validate_valid_audio(self):
"""Test validating a valid audio file within constraints."""
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
errors = validate_audio(file, constraints, raise_on_error=False)
assert len(errors) == 0
def test_validate_audio_too_large(self):
"""Test validating an audio file that exceeds size limit."""
constraints = AudioConstraints(
max_size_bytes=10, # Very small limit
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
with pytest.raises(FileTooLargeError) as exc_info:
validate_audio(file, constraints)
assert "exceeds" in str(exc_info.value)
assert exc_info.value.file_name == "test.mp3"
def test_validate_audio_unsupported_format(self):
"""Test validating an audio file with unsupported format."""
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
supported_formats=("audio/wav",), # Only WAV
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
with pytest.raises(UnsupportedFileTypeError) as exc_info:
validate_audio(file, constraints)
assert "not supported" in str(exc_info.value)
@patch("crewai.files.processing.validators._get_audio_duration")
def test_validate_audio_duration_passes(self, mock_get_duration):
"""Test validating audio when duration is under limit."""
mock_get_duration.return_value = 30.0
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
errors = validate_audio(file, constraints, raise_on_error=False)
assert len(errors) == 0
mock_get_duration.assert_called_once()
@patch("crewai.files.processing.validators._get_audio_duration")
def test_validate_audio_duration_fails(self, mock_get_duration):
"""Test validating audio when duration exceeds limit."""
mock_get_duration.return_value = 120.5
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
with pytest.raises(FileValidationError) as exc_info:
validate_audio(file, constraints)
assert "duration" in str(exc_info.value).lower()
assert "120.5s" in str(exc_info.value)
assert "60s" in str(exc_info.value)
@patch("crewai.files.processing.validators._get_audio_duration")
def test_validate_audio_duration_no_raise(self, mock_get_duration):
"""Test audio duration validation with raise_on_error=False."""
mock_get_duration.return_value = 120.5
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
errors = validate_audio(file, constraints, raise_on_error=False)
assert len(errors) == 1
assert "duration" in errors[0].lower()
@patch("crewai.files.processing.validators._get_audio_duration")
def test_validate_audio_duration_none_skips(self, mock_get_duration):
"""Test that duration validation is skipped when max_duration_seconds is None."""
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=None,
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
errors = validate_audio(file, constraints, raise_on_error=False)
assert len(errors) == 0
mock_get_duration.assert_not_called()
@patch("crewai.files.processing.validators._get_audio_duration")
def test_validate_audio_duration_detection_returns_none(self, mock_get_duration):
"""Test that validation passes when duration detection returns None."""
mock_get_duration.return_value = None
constraints = AudioConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("audio/mp3", "audio/mpeg", FALLBACK_CONTENT_TYPE),
)
file = AudioFile(source=FileBytes(data=MINIMAL_AUDIO, filename="test.mp3"))
errors = validate_audio(file, constraints, raise_on_error=False)
assert len(errors) == 0
class TestValidateVideo:
"""Tests for validate_video function and video duration validation."""
def test_validate_valid_video(self):
"""Test validating a valid video file within constraints."""
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
errors = validate_video(file, constraints, raise_on_error=False)
assert len(errors) == 0
def test_validate_video_too_large(self):
"""Test validating a video file that exceeds size limit."""
constraints = VideoConstraints(
max_size_bytes=10, # Very small limit
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
with pytest.raises(FileTooLargeError) as exc_info:
validate_video(file, constraints)
assert "exceeds" in str(exc_info.value)
assert exc_info.value.file_name == "test.mp4"
def test_validate_video_unsupported_format(self):
"""Test validating a video file with unsupported format."""
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
supported_formats=("video/webm",), # Only WebM
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
with pytest.raises(UnsupportedFileTypeError) as exc_info:
validate_video(file, constraints)
assert "not supported" in str(exc_info.value)
@patch("crewai.files.processing.validators._get_video_duration")
def test_validate_video_duration_passes(self, mock_get_duration):
"""Test validating video when duration is under limit."""
mock_get_duration.return_value = 30.0
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
errors = validate_video(file, constraints, raise_on_error=False)
assert len(errors) == 0
mock_get_duration.assert_called_once()
@patch("crewai.files.processing.validators._get_video_duration")
def test_validate_video_duration_fails(self, mock_get_duration):
"""Test validating video when duration exceeds limit."""
mock_get_duration.return_value = 180.0
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
with pytest.raises(FileValidationError) as exc_info:
validate_video(file, constraints)
assert "duration" in str(exc_info.value).lower()
assert "180.0s" in str(exc_info.value)
assert "60s" in str(exc_info.value)
@patch("crewai.files.processing.validators._get_video_duration")
def test_validate_video_duration_no_raise(self, mock_get_duration):
"""Test video duration validation with raise_on_error=False."""
mock_get_duration.return_value = 180.0
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
errors = validate_video(file, constraints, raise_on_error=False)
assert len(errors) == 1
assert "duration" in errors[0].lower()
@patch("crewai.files.processing.validators._get_video_duration")
def test_validate_video_duration_none_skips(self, mock_get_duration):
"""Test that duration validation is skipped when max_duration_seconds is None."""
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=None,
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
errors = validate_video(file, constraints, raise_on_error=False)
assert len(errors) == 0
mock_get_duration.assert_not_called()
@patch("crewai.files.processing.validators._get_video_duration")
def test_validate_video_duration_detection_returns_none(self, mock_get_duration):
"""Test that validation passes when duration detection returns None."""
mock_get_duration.return_value = None
constraints = VideoConstraints(
max_size_bytes=10 * 1024 * 1024,
max_duration_seconds=60,
supported_formats=("video/mp4", FALLBACK_CONTENT_TYPE),
)
file = VideoFile(source=FileBytes(data=MINIMAL_VIDEO, filename="test.mp4"))
errors = validate_video(file, constraints, raise_on_error=False)
assert len(errors) == 0
class TestGetAudioDuration:
"""Tests for _get_audio_duration helper function."""
def test_get_audio_duration_corrupt_file(self):
"""Test handling of corrupt audio data."""
corrupt_data = b"not valid audio data at all"
result = _get_audio_duration(corrupt_data)
assert result is None
class TestGetVideoDuration:
"""Tests for _get_video_duration helper function."""
def test_get_video_duration_corrupt_file(self):
"""Test handling of corrupt video data."""
corrupt_data = b"not valid video data at all"
result = _get_video_duration(corrupt_data)
assert result is None

View File

@@ -0,0 +1,312 @@
"""Tests for FileUrl source type and URL resolution."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from crewai.files import FileBytes, FileUrl, ImageFile
from crewai.files.file import _normalize_source, FilePath
from crewai.files.resolved import InlineBase64, UrlReference
from crewai.files.resolver import FileResolver
class TestFileUrl:
"""Tests for FileUrl source type."""
def test_create_file_url(self):
"""Test creating FileUrl with valid URL."""
url = FileUrl(url="https://example.com/image.png")
assert url.url == "https://example.com/image.png"
assert url.filename is None
def test_create_file_url_with_filename(self):
"""Test creating FileUrl with custom filename."""
url = FileUrl(url="https://example.com/image.png", filename="custom.png")
assert url.url == "https://example.com/image.png"
assert url.filename == "custom.png"
def test_invalid_url_scheme_raises(self):
"""Test that non-http(s) URLs raise ValueError."""
with pytest.raises(ValueError, match="Invalid URL scheme"):
FileUrl(url="ftp://example.com/file.txt")
def test_invalid_url_scheme_file_raises(self):
"""Test that file:// URLs raise ValueError."""
with pytest.raises(ValueError, match="Invalid URL scheme"):
FileUrl(url="file:///path/to/file.txt")
def test_http_url_valid(self):
"""Test that HTTP URLs are valid."""
url = FileUrl(url="http://example.com/image.jpg")
assert url.url == "http://example.com/image.jpg"
def test_https_url_valid(self):
"""Test that HTTPS URLs are valid."""
url = FileUrl(url="https://example.com/image.jpg")
assert url.url == "https://example.com/image.jpg"
def test_content_type_guessing_png(self):
"""Test content type guessing for PNG files."""
url = FileUrl(url="https://example.com/image.png")
assert url.content_type == "image/png"
def test_content_type_guessing_jpeg(self):
"""Test content type guessing for JPEG files."""
url = FileUrl(url="https://example.com/photo.jpg")
assert url.content_type == "image/jpeg"
def test_content_type_guessing_pdf(self):
"""Test content type guessing for PDF files."""
url = FileUrl(url="https://example.com/document.pdf")
assert url.content_type == "application/pdf"
def test_content_type_guessing_with_query_params(self):
"""Test content type guessing with URL query parameters."""
url = FileUrl(url="https://example.com/image.png?v=123&token=abc")
assert url.content_type == "image/png"
def test_content_type_fallback_unknown(self):
"""Test content type falls back to octet-stream for unknown extensions."""
url = FileUrl(url="https://example.com/file.unknownext123")
assert url.content_type == "application/octet-stream"
def test_content_type_no_extension(self):
"""Test content type for URL without extension."""
url = FileUrl(url="https://example.com/file")
assert url.content_type == "application/octet-stream"
def test_read_fetches_content(self):
"""Test that read() fetches content from URL."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"fake image content"
mock_response.headers = {"content-type": "image/png"}
with patch("httpx.get", return_value=mock_response) as mock_get:
content = url.read()
mock_get.assert_called_once_with(
"https://example.com/image.png", follow_redirects=True
)
assert content == b"fake image content"
def test_read_caches_content(self):
"""Test that read() caches content."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"fake content"
mock_response.headers = {}
with patch("httpx.get", return_value=mock_response) as mock_get:
content1 = url.read()
content2 = url.read()
mock_get.assert_called_once()
assert content1 == content2
def test_read_updates_content_type_from_response(self):
"""Test that read() updates content type from response headers."""
url = FileUrl(url="https://example.com/file")
mock_response = MagicMock()
mock_response.content = b"fake content"
mock_response.headers = {"content-type": "image/webp; charset=utf-8"}
with patch("httpx.get", return_value=mock_response):
url.read()
assert url.content_type == "image/webp"
@pytest.mark.asyncio
async def test_aread_fetches_content(self):
"""Test that aread() fetches content from URL asynchronously."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"async fake content"
mock_response.headers = {"content-type": "image/png"}
mock_response.raise_for_status = MagicMock()
mock_client = MagicMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
with patch("httpx.AsyncClient", return_value=mock_client):
content = await url.aread()
assert content == b"async fake content"
@pytest.mark.asyncio
async def test_aread_caches_content(self):
"""Test that aread() caches content."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"cached content"
mock_response.headers = {}
mock_response.raise_for_status = MagicMock()
mock_client = MagicMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
with patch("httpx.AsyncClient", return_value=mock_client):
content1 = await url.aread()
content2 = await url.aread()
mock_client.get.assert_called_once()
assert content1 == content2
class TestNormalizeSource:
"""Tests for _normalize_source with URL detection."""
def test_normalize_url_string(self):
"""Test that URL strings are converted to FileUrl."""
result = _normalize_source("https://example.com/image.png")
assert isinstance(result, FileUrl)
assert result.url == "https://example.com/image.png"
def test_normalize_http_url_string(self):
"""Test that HTTP URL strings are converted to FileUrl."""
result = _normalize_source("http://example.com/file.pdf")
assert isinstance(result, FileUrl)
assert result.url == "http://example.com/file.pdf"
def test_normalize_file_path_string(self, tmp_path):
"""Test that file path strings are converted to FilePath."""
test_file = tmp_path / "test.png"
test_file.write_bytes(b"test content")
result = _normalize_source(str(test_file))
assert isinstance(result, FilePath)
def test_normalize_relative_path_is_not_url(self):
"""Test that relative path strings are not treated as URLs."""
result = _normalize_source("https://example.com/file.png")
assert isinstance(result, FileUrl)
assert not isinstance(result, FilePath)
def test_normalize_file_url_passthrough(self):
"""Test that FileUrl instances pass through unchanged."""
original = FileUrl(url="https://example.com/image.png")
result = _normalize_source(original)
assert result is original
class TestResolverUrlHandling:
"""Tests for FileResolver URL handling."""
def test_resolve_url_source_for_supported_provider(self):
"""Test URL source resolves to UrlReference for supported providers."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.png"))
resolved = resolver.resolve(file, "anthropic")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.png"
assert resolved.content_type == "image/png"
def test_resolve_url_source_openai(self):
"""Test URL source resolves to UrlReference for OpenAI."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/photo.jpg"))
resolved = resolver.resolve(file, "openai")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/photo.jpg"
def test_resolve_url_source_gemini(self):
"""Test URL source resolves to UrlReference for Gemini."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.webp"))
resolved = resolver.resolve(file, "gemini")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.webp"
def test_resolve_url_source_azure(self):
"""Test URL source resolves to UrlReference for Azure."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.gif"))
resolved = resolver.resolve(file, "azure")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.gif"
def test_resolve_url_source_bedrock_fetches_content(self):
"""Test URL source fetches content for Bedrock (unsupported URLs)."""
resolver = FileResolver()
file_url = FileUrl(url="https://example.com/image.png")
file = ImageFile(source=file_url)
mock_response = MagicMock()
mock_response.content = b"\x89PNG\r\n\x1a\n" + b"\x00" * 50
mock_response.headers = {"content-type": "image/png"}
with patch("httpx.get", return_value=mock_response):
resolved = resolver.resolve(file, "bedrock")
assert not isinstance(resolved, UrlReference)
def test_resolve_bytes_source_still_works(self):
"""Test that bytes source still resolves normally."""
resolver = FileResolver()
minimal_png = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x08\x00\x00\x00\x08"
b"\x01\x00\x00\x00\x00\xf9Y\xab\xcd\x00\x00\x00\nIDATx\x9cc`\x00\x00"
b"\x00\x02\x00\x01\xe2!\xbc3\x00\x00\x00\x00IEND\xaeB`\x82"
)
file = ImageFile(source=FileBytes(data=minimal_png, filename="test.png"))
resolved = resolver.resolve(file, "anthropic")
assert isinstance(resolved, InlineBase64)
@pytest.mark.asyncio
async def test_aresolve_url_source(self):
"""Test async URL resolution for supported provider."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.png"))
resolved = await resolver.aresolve(file, "anthropic")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.png"
class TestImageFileWithUrl:
"""Tests for creating ImageFile with URL source."""
def test_image_file_from_url_string(self):
"""Test creating ImageFile from URL string."""
file = ImageFile(source="https://example.com/image.png")
assert isinstance(file.source, FileUrl)
assert file.source.url == "https://example.com/image.png"
def test_image_file_from_file_url(self):
"""Test creating ImageFile from FileUrl instance."""
url = FileUrl(url="https://example.com/photo.jpg")
file = ImageFile(source=url)
assert file.source is url
assert file.content_type == "image/jpeg"

51
uv.lock generated
View File

@@ -418,6 +418,44 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608, upload-time = "2025-10-02T13:36:07.637Z" },
]
[[package]]
name = "av"
version = "13.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e1/df/4f77aa98b998e1a19622b7a45da07884a053826e9038138d8023208e31e5/av-13.0.0.tar.gz", hash = "sha256:7fb1a5588cd8ce4d0564ddf82221f886541ea2d5152f15e63ab890430dcd3c31", size = 3884902, upload-time = "2024-09-04T08:30:48.971Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/ac/fdacc4e49b946ac9274c9363eeedceed824a71fa09df5c799cb4a137d80d/av-13.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a0f3563eb232c46811388d19eb8da3435ebd98e3b26c567da76acb878c772a4f", size = 24229400, upload-time = "2024-09-04T08:28:26.627Z" },
{ url = "https://files.pythonhosted.org/packages/55/8d/bc8670f8a2084aaf4b738017e490a5c762023b88517fd579cbaff6ab18f3/av-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52713a673ccf743cb0692c7aa9b02429d7efee3fa19281dda1167685f8c21864", size = 19446165, upload-time = "2024-09-04T08:28:30.132Z" },
{ url = "https://files.pythonhosted.org/packages/13/23/8280bc3a0df950f6fd8e57621f037d708c2065534311c7b6d88ec22e080a/av-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf667841f54cc82d5a09b9c31921dfafc22a6293aa17b9bd11f33c6c08e372d0", size = 31141668, upload-time = "2024-09-04T08:28:33.811Z" },
{ url = "https://files.pythonhosted.org/packages/72/d3/16dfe2bc810be142f06ef93b9eadfddc51309bcdb0ca80c566aa889f0dde/av-13.0.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6a3a4a572d3c70fd3d8709b9ae5d8a7cd6ef813b46d571a95477a87d0f3e282", size = 30565447, upload-time = "2024-09-04T08:28:37.579Z" },
{ url = "https://files.pythonhosted.org/packages/64/56/41f067fa8344027c03abbaeaf5826838c97404a47472c521a658f0656472/av-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ed3b70ca98c3f3ba130f23ec1393316eb714f35d41b4c1d9d1ef4951f862cc0", size = 32975707, upload-time = "2024-09-04T08:28:41.418Z" },
{ url = "https://files.pythonhosted.org/packages/23/53/182589a2501f44cde451a18c8db372fec714bd3dfdd8906277fce3b10c18/av-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:43db19eb2704a5a8b6060c070bcf05e0ce1132edb3140f8a19271ac8eac63706", size = 25747720, upload-time = "2024-09-04T08:28:44.816Z" },
{ url = "https://files.pythonhosted.org/packages/d1/b3/37460a6b94ee2a284b8d585a19cc63b32a9318b4c1eee0e25b6f24df415a/av-13.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b3ec126e5c30a0d44c6ce6cd0be72b2af83529e5b19c41e6569a7c4d00261d04", size = 24224476, upload-time = "2024-09-04T08:28:48.276Z" },
{ url = "https://files.pythonhosted.org/packages/b0/a7/1cc83b2e0aeead07c3e9c59cbddf15f2b555578c6b725cc65bdbbec4c4d6/av-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0014c16d9123f50f366e32baed5c358429ed64c701ed5cea135fba333a5c9b13", size = 19438756, upload-time = "2024-09-04T08:28:51.511Z" },
{ url = "https://files.pythonhosted.org/packages/b3/b6/d6a85b89b14d60b360fb8eab65a9e7d8119d2807dcb025bc93baeff565a6/av-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fa360cfc3e55ef1b22199741c74b584a57d2af75d5e5d9b54dd8cc999ae50bb", size = 32084112, upload-time = "2024-09-04T08:28:54.434Z" },
{ url = "https://files.pythonhosted.org/packages/cf/1d/3b5d4ce10de1b383a1f68dcf4f7679a34f5f6cf8aad1a0dfcfbf05c5fd7e/av-13.0.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3519e3effea342295de5f52dbcd263800db2ab1ab5e43ec6485ba1ed07c2e503", size = 31396374, upload-time = "2024-09-04T08:28:58.027Z" },
{ url = "https://files.pythonhosted.org/packages/7a/8e/c5bea32963acacbc0db7b1c6e6d5a181afee2951981b88533c771beabc53/av-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f76e0060f4aa4be0911db624039e31c973dce9f9f2d410dc817b2b88e199a74", size = 33913273, upload-time = "2024-09-04T08:29:01.251Z" },
{ url = "https://files.pythonhosted.org/packages/ce/30/1912588c0bce8baf6e490103e5c4ef1963f8bc0f0c00d82cde2b6b3793fc/av-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b21254571904b214fc586568ba1da62d38f00cc4f76c7eebbe14af9f8dd8a40f", size = 25750490, upload-time = "2024-09-04T08:29:04.985Z" },
{ url = "https://files.pythonhosted.org/packages/df/90/f8120cebf0b86ff70691603a6fb1ef473d1fd9c99db058d0413e9a630538/av-13.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9eaf76c3a8a40dc3424ee9360b457143699d96f6e3faffb00867fd747b821ab9", size = 24238853, upload-time = "2024-09-04T08:29:08.611Z" },
{ url = "https://files.pythonhosted.org/packages/62/7d/090813d188eebbe183acad6e0cfbd9cdeca0e7f7318a0a3bd6f44ac7d16f/av-13.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:623809f0684bf4379328ced38a25c295969997ba574ed17b99fa4ee3aa564d66", size = 19446605, upload-time = "2024-09-04T08:29:11.922Z" },
{ url = "https://files.pythonhosted.org/packages/71/ec/bdc954939463127ca38ee023061be0ac89bdf2f2de6ab23f6a1d8112d070/av-13.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dc441b3899f1eb259af17acb2e5218762dcc99a4fbd6fe4d1f4155e253728b2", size = 32317356, upload-time = "2024-09-04T08:29:15.475Z" },
{ url = "https://files.pythonhosted.org/packages/00/78/8d808f4868862b1b539ffd9af1775792f128a903f134c2dbfdb39a7799e3/av-13.0.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b9654f9261ba123377b95fd5a9214e05ba43d7545cb41a5ae2dd5ea5fe6fbc9", size = 31666294, upload-time = "2024-09-04T08:29:18.805Z" },
{ url = "https://files.pythonhosted.org/packages/f7/fd/ee64d545a60c73795285cbe70f27e49b46c40e1ca3c8c35411b75ea310e6/av-13.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8157821b9da3814720d9b7ea45d961275dc73be8161eae7258afe2f737da5779", size = 34243366, upload-time = "2024-09-04T08:29:22.423Z" },
{ url = "https://files.pythonhosted.org/packages/c1/49/08552c5c2b838016cbba90547a0c082e9e8b700eaaf90c8eb0c11fec595e/av-13.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:736c4a9cb6ef6e5f3aa1cb12609a615f6c93bf16f36439010dc1ba160beed827", size = 25751891, upload-time = "2024-09-04T08:29:26.781Z" },
{ url = "https://files.pythonhosted.org/packages/4e/fd/08eeec9bd07129242989cb69cb45be5ff4c394af27b661d7c4428c460669/av-13.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4074615d89852dc8d7aa852b9162fe855bc2c6850e0cab74a875d4e72eefe343", size = 24197575, upload-time = "2024-09-04T08:29:30.194Z" },
{ url = "https://files.pythonhosted.org/packages/f3/0a/70d1848f325fd595f009f419e11134020aca1e0bf99c0041c0f5a767a01d/av-13.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a2df1f311610dcedbd0b08a5a419ae17076aa9cd808a6d4f0b5cb8c69d604e9f", size = 19406017, upload-time = "2024-09-04T08:29:32.951Z" },
{ url = "https://files.pythonhosted.org/packages/3f/10/2c1007829950cc1b7b17593d0d304adf008331729083af3d9b7c34e10b52/av-13.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1990d1398c25d90045c771450a64bf9aff33d8e6c89568fbbc5cc85ec6ceaa1", size = 31966860, upload-time = "2024-09-04T08:29:36.272Z" },
{ url = "https://files.pythonhosted.org/packages/1d/d7/f64af0713a669560ef33eea30c08add46916cab4ff0b26b473c14a9ff32c/av-13.0.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3303584abfcc2787a3dcf303fddcab0968329a309360c22348cc2c31e060f8d9", size = 31333914, upload-time = "2024-09-04T08:29:40.417Z" },
{ url = "https://files.pythonhosted.org/packages/c5/6c/647368ea1b60059a0a0dec3eae7c76b3aaec3e222c3cbcb54af0c2716d37/av-13.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05de5e2e6dde42d804dc41aa36102f64849fc72d0c7f9afc28406a7b240dba7a", size = 33908881, upload-time = "2024-09-04T08:29:44.161Z" },
{ url = "https://files.pythonhosted.org/packages/2a/bc/e2305f5e18eb47b5eac80e29de2fc1110898bb48131bb2a6d0d893080969/av-13.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:f9cea8906abf010f6d4894c7cad52e257667d0a498d4eec7e5beb4eff519d3ff", size = 25724252, upload-time = "2024-09-04T08:29:48.344Z" },
{ url = "https://files.pythonhosted.org/packages/35/6e/1cba0d4506a3855f718615a826958b5b9f08d3b263216b8ba2fc578e54da/av-13.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d066d441efbd329947ff36604422b3a22ee65a98a78caa0869d2400cebc46381", size = 23837589, upload-time = "2024-09-04T08:30:13.345Z" },
{ url = "https://files.pythonhosted.org/packages/2a/23/8553944c6d782c4fe0883f969866f2ab1ad8546a4361c942aa80873583d5/av-13.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9836b348f648ef5a364075626e623cef39383fe439159f5875e588429c7c90ea", size = 19091589, upload-time = "2024-09-04T08:30:16.075Z" },
{ url = "https://files.pythonhosted.org/packages/0d/d4/5286b9bea8d6a87853f93116f4eef6f3d5ab64a9382371d851eb705d9299/av-13.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52aeefdaa9fd5182aee1d4ae53325756273e293173810c77960e012a9a4efda0", size = 22823448, upload-time = "2024-09-04T08:30:19.446Z" },
{ url = "https://files.pythonhosted.org/packages/27/3f/37253b9746459f570a871170d70c7c43eed58a4e755a9e1f2c67c27d6dbe/av-13.0.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aae4116c3cc94f514501f856df4a351eb3386fbc5623d3dcb17476237ffae221", size = 22673845, upload-time = "2024-09-04T08:30:22.129Z" },
{ url = "https://files.pythonhosted.org/packages/de/fa/e6995a721ce5ca9aa7e5a58dfeeb3df7c6f846f10e54ac32cbaf2948682a/av-13.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2425c8b0c8a022f10a20f3075bec05fc8efe4c5848e038d7d168cbbca089f08a", size = 24628585, upload-time = "2024-09-04T08:30:25.345Z" },
{ url = "https://files.pythonhosted.org/packages/33/b9/1023b925f6505cba49fe22a08020dd0dfb9185c42d4f26fc6217b9e1c2e2/av-13.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:894dc43623b959d00ab9a62c0357929ba7a8dd8667b37afb046caee756f9e90a", size = 25536060, upload-time = "2024-09-04T08:30:28.418Z" },
]
[[package]]
name = "azure-ai-inference"
version = "1.0.0b9"
@@ -1203,9 +1241,11 @@ embeddings = [
file-processing = [
{ name = "aiocache" },
{ name = "aiofiles" },
{ name = "av" },
{ name = "pillow" },
{ name = "pypdf" },
{ name = "python-magic" },
{ name = "tinytag" },
]
google-genai = [
{ name = "google-genai" },
@@ -1245,6 +1285,7 @@ requires-dist = [
{ name = "aiosqlite", specifier = "~=0.21.0" },
{ name = "anthropic", marker = "extra == 'anthropic'", specifier = "~=0.71.0" },
{ name = "appdirs", specifier = "~=1.4.4" },
{ name = "av", marker = "extra == 'file-processing'", specifier = "~=13.0.0" },
{ name = "azure-ai-inference", marker = "extra == 'azure-ai-inference'", specifier = "~=1.0.0b9" },
{ name = "boto3", marker = "extra == 'aws'", specifier = "~=1.40.38" },
{ name = "boto3", marker = "extra == 'bedrock'", specifier = "~=1.40.45" },
@@ -1282,6 +1323,7 @@ requires-dist = [
{ name = "qdrant-client", extras = ["fastembed"], marker = "extra == 'qdrant'", specifier = "~=1.14.3" },
{ name = "regex", specifier = "~=2024.9.11" },
{ name = "tiktoken", marker = "extra == 'embeddings'", specifier = "~=0.8.0" },
{ name = "tinytag", marker = "extra == 'file-processing'", specifier = "~=1.10.0" },
{ name = "tokenizers", specifier = "~=0.20.3" },
{ name = "tomli", specifier = "~=2.0.2" },
{ name = "tomli-w", specifier = "~=1.1.0" },
@@ -7806,6 +7848,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d6/14/fc04d491527b774ec7479897f5861959209de1480e4c4cd32ed098ff8bea/timm-1.0.22-py3-none-any.whl", hash = "sha256:888981753e65cbaacfc07494370138b1700a27b1f0af587f4f9b47bc024161d0", size = 2530238, upload-time = "2025-11-05T04:06:06.823Z" },
]
[[package]]
name = "tinytag"
version = "1.10.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/59/b5/ff5e5f9ca9677be7272260f67c87f7e8e885babc7ce94604e837dcfd8d76/tinytag-1.10.1.tar.gz", hash = "sha256:122a63b836f85094aacca43fc807aaee3290be3de17d134f5f4a08b509ae268f", size = 40906, upload-time = "2023-10-26T19:30:38.791Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2f/04/ef783cbc4aa3a5ed75969e300b3e3929daf3d1b52fe80e950c63e0d66d95/tinytag-1.10.1-py3-none-any.whl", hash = "sha256:e437654d04c966fbbbdbf807af61eb9759f1d80e4173a7d26202506b37cfdaf0", size = 37900, upload-time = "2023-10-26T19:30:36.724Z" },
]
[[package]]
name = "tokenizers"
version = "0.20.3"