mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-26 16:48:13 +00:00
refactor: fix IDE warnings and add Literal types to constraints
- Add Literal types for ImageFormat, AudioFormat, VideoFormat, ProviderName - Convert methods to @staticmethod where appropriate - Remove redundant default parameter values - Fix variable shadowing in nested functions - Make magic import optional with mimetypes fallback - Add docstrings to inner functions
This commit is contained in:
@@ -237,9 +237,12 @@ async def acleanup_uploaded_files(
|
|||||||
if delete_from_provider:
|
if delete_from_provider:
|
||||||
semaphore = asyncio.Semaphore(max_concurrency)
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
|
|
||||||
async def delete_one(uploader: FileUploader, upload: CachedUpload) -> bool:
|
async def delete_one(file_uploader: FileUploader, cached: CachedUpload) -> bool:
|
||||||
|
"""Delete a single file with semaphore limiting."""
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await _asafe_delete(uploader, upload.file_id, upload.provider)
|
return await _asafe_delete(
|
||||||
|
file_uploader, cached.file_id, cached.provider
|
||||||
|
)
|
||||||
|
|
||||||
tasks: list[asyncio.Task[bool]] = []
|
tasks: list[asyncio.Task[bool]] = []
|
||||||
for provider, uploads in provider_uploads.items():
|
for provider, uploads in provider_uploads.items():
|
||||||
@@ -251,7 +254,7 @@ async def acleanup_uploaded_files(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
tasks.extend(
|
tasks.extend(
|
||||||
asyncio.create_task(delete_one(uploader, upload)) for upload in uploads
|
asyncio.create_task(delete_one(uploader, cached)) for cached in uploads
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
@@ -291,19 +294,20 @@ async def acleanup_expired_files(
|
|||||||
if delete_from_provider and expired_entries:
|
if delete_from_provider and expired_entries:
|
||||||
semaphore = asyncio.Semaphore(max_concurrency)
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
|
|
||||||
async def delete_expired(upload: CachedUpload) -> None:
|
async def delete_expired(cached: CachedUpload) -> None:
|
||||||
|
"""Delete an expired file with semaphore limiting."""
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
uploader = get_uploader(upload.provider)
|
file_uploader = get_uploader(cached.provider)
|
||||||
if uploader is not None:
|
if file_uploader is not None:
|
||||||
try:
|
try:
|
||||||
await uploader.adelete(upload.file_id)
|
await file_uploader.adelete(cached.file_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Could not delete expired file {upload.file_id}: {e}"
|
f"Could not delete expired file {cached.file_id}: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
await asyncio.gather(
|
await asyncio.gather(
|
||||||
*[delete_expired(upload) for upload in expired_entries],
|
*[delete_expired(cached) for cached in expired_entries],
|
||||||
return_exceptions=True,
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -337,18 +341,19 @@ async def acleanup_provider_files(
|
|||||||
|
|
||||||
semaphore = asyncio.Semaphore(max_concurrency)
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
|
|
||||||
async def delete_file(file_id: str) -> bool:
|
async def delete_single(target_file_id: str) -> bool:
|
||||||
|
"""Delete a single file with semaphore limiting."""
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await uploader.adelete(file_id)
|
return await uploader.adelete(target_file_id)
|
||||||
|
|
||||||
if delete_all_from_provider:
|
if delete_all_from_provider:
|
||||||
try:
|
try:
|
||||||
files = uploader.list_files()
|
files = uploader.list_files()
|
||||||
tasks = []
|
tasks = []
|
||||||
for file_info in files:
|
for file_info in files:
|
||||||
file_id = file_info.get("id") or file_info.get("name")
|
fid = file_info.get("id") or file_info.get("name")
|
||||||
if file_id:
|
if fid:
|
||||||
tasks.append(delete_file(file_id))
|
tasks.append(delete_single(fid))
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
deleted = sum(1 for r in results if r is True)
|
deleted = sum(1 for r in results if r is True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -357,7 +362,7 @@ async def acleanup_provider_files(
|
|||||||
uploads = await cache.aget_all_for_provider(provider)
|
uploads = await cache.aget_all_for_provider(provider)
|
||||||
tasks = []
|
tasks = []
|
||||||
for upload in uploads:
|
for upload in uploads:
|
||||||
tasks.append(delete_file(upload.file_id))
|
tasks.append(delete_single(upload.file_id))
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
for upload, result in zip(uploads, results, strict=False):
|
for upload, result in zip(uploads, results, strict=False):
|
||||||
if result is True:
|
if result is True:
|
||||||
|
|||||||
@@ -3,11 +3,11 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import AsyncIterator, Iterator
|
from collections.abc import AsyncIterator, Iterator
|
||||||
|
import mimetypes
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, Any, BinaryIO, Protocol, cast, runtime_checkable
|
from typing import Annotated, Any, BinaryIO, Protocol, cast, runtime_checkable
|
||||||
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import magic
|
|
||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
BeforeValidator,
|
BeforeValidator,
|
||||||
@@ -52,17 +52,30 @@ ValidatedAsyncReadable = Annotated[AsyncReadable, _AsyncReadableValidator()]
|
|||||||
DEFAULT_MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 # 500MB
|
DEFAULT_MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 # 500MB
|
||||||
|
|
||||||
|
|
||||||
def detect_content_type(data: bytes) -> str:
|
def detect_content_type(data: bytes, filename: str | None = None) -> str:
|
||||||
"""Detect MIME type from file content.
|
"""Detect MIME type from file content.
|
||||||
|
|
||||||
|
Uses python-magic if available for accurate content-based detection,
|
||||||
|
falls back to mimetypes module using filename extension.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
data: Raw bytes to analyze.
|
data: Raw bytes to analyze.
|
||||||
|
filename: Optional filename for extension-based fallback.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The detected MIME type.
|
The detected MIME type.
|
||||||
"""
|
"""
|
||||||
result: str = magic.from_buffer(data, mime=True)
|
try:
|
||||||
return result
|
import magic
|
||||||
|
|
||||||
|
result: str = magic.from_buffer(data, mime=True)
|
||||||
|
return result
|
||||||
|
except ImportError:
|
||||||
|
if filename:
|
||||||
|
mime_type, _ = mimetypes.guess_type(filename)
|
||||||
|
if mime_type:
|
||||||
|
return mime_type
|
||||||
|
return "application/octet-stream"
|
||||||
|
|
||||||
|
|
||||||
class _BinaryIOValidator:
|
class _BinaryIOValidator:
|
||||||
@@ -139,7 +152,7 @@ class FilePath(BaseModel):
|
|||||||
@property
|
@property
|
||||||
def content_type(self) -> str:
|
def content_type(self) -> str:
|
||||||
"""Get the content type by reading file content."""
|
"""Get the content type by reading file content."""
|
||||||
return detect_content_type(self.read())
|
return detect_content_type(self.read(), self.filename)
|
||||||
|
|
||||||
def read(self) -> bytes:
|
def read(self) -> bytes:
|
||||||
"""Read the file content from disk."""
|
"""Read the file content from disk."""
|
||||||
@@ -190,7 +203,7 @@ class FileBytes(BaseModel):
|
|||||||
@property
|
@property
|
||||||
def content_type(self) -> str:
|
def content_type(self) -> str:
|
||||||
"""Get the content type from the data."""
|
"""Get the content type from the data."""
|
||||||
return detect_content_type(self.data)
|
return detect_content_type(self.data, self.filename)
|
||||||
|
|
||||||
def read(self) -> bytes:
|
def read(self) -> bytes:
|
||||||
"""Return the bytes content."""
|
"""Return the bytes content."""
|
||||||
@@ -242,7 +255,7 @@ class FileStream(BaseModel):
|
|||||||
@property
|
@property
|
||||||
def content_type(self) -> str:
|
def content_type(self) -> str:
|
||||||
"""Get the content type from stream content."""
|
"""Get the content type from stream content."""
|
||||||
return detect_content_type(self.read())
|
return detect_content_type(self.read(), self.filename)
|
||||||
|
|
||||||
def read(self) -> bytes:
|
def read(self) -> bytes:
|
||||||
"""Read the stream content. Content is cached after first read."""
|
"""Read the stream content. Content is cached after first read."""
|
||||||
@@ -310,7 +323,7 @@ class AsyncFileStream(BaseModel):
|
|||||||
"""Get the content type from stream content. Requires aread() first."""
|
"""Get the content type from stream content. Requires aread() first."""
|
||||||
if self._content is None:
|
if self._content is None:
|
||||||
raise RuntimeError("Call aread() first to load content")
|
raise RuntimeError("Call aread() first to load content")
|
||||||
return detect_content_type(self._content)
|
return detect_content_type(self._content, self.filename)
|
||||||
|
|
||||||
async def aread(self) -> bytes:
|
async def aread(self) -> bytes:
|
||||||
"""Async read the stream content. Content is cached after first read."""
|
"""Async read the stream content. Content is cached after first read."""
|
||||||
|
|||||||
@@ -1,6 +1,99 @@
|
|||||||
"""Provider-specific file constraints for multimodal content."""
|
"""Provider-specific file constraints for multimodal content."""
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
|
ImageFormat = Literal[
|
||||||
|
"image/png",
|
||||||
|
"image/jpeg",
|
||||||
|
"image/gif",
|
||||||
|
"image/webp",
|
||||||
|
"image/heic",
|
||||||
|
"image/heif",
|
||||||
|
]
|
||||||
|
|
||||||
|
AudioFormat = Literal[
|
||||||
|
"audio/mp3",
|
||||||
|
"audio/mpeg",
|
||||||
|
"audio/wav",
|
||||||
|
"audio/ogg",
|
||||||
|
"audio/flac",
|
||||||
|
"audio/aac",
|
||||||
|
"audio/m4a",
|
||||||
|
"audio/opus",
|
||||||
|
]
|
||||||
|
|
||||||
|
VideoFormat = Literal[
|
||||||
|
"video/mp4",
|
||||||
|
"video/mpeg",
|
||||||
|
"video/webm",
|
||||||
|
"video/quicktime",
|
||||||
|
"video/x-msvideo",
|
||||||
|
"video/x-flv",
|
||||||
|
]
|
||||||
|
|
||||||
|
ProviderName = Literal[
|
||||||
|
"anthropic",
|
||||||
|
"openai",
|
||||||
|
"gemini",
|
||||||
|
"bedrock",
|
||||||
|
"azure",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Pre-typed format tuples for common combinations
|
||||||
|
DEFAULT_IMAGE_FORMATS: tuple[ImageFormat, ...] = (
|
||||||
|
"image/png",
|
||||||
|
"image/jpeg",
|
||||||
|
"image/gif",
|
||||||
|
"image/webp",
|
||||||
|
)
|
||||||
|
|
||||||
|
GEMINI_IMAGE_FORMATS: tuple[ImageFormat, ...] = (
|
||||||
|
"image/png",
|
||||||
|
"image/jpeg",
|
||||||
|
"image/gif",
|
||||||
|
"image/webp",
|
||||||
|
"image/heic",
|
||||||
|
"image/heif",
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_AUDIO_FORMATS: tuple[AudioFormat, ...] = (
|
||||||
|
"audio/mp3",
|
||||||
|
"audio/mpeg",
|
||||||
|
"audio/wav",
|
||||||
|
"audio/ogg",
|
||||||
|
"audio/flac",
|
||||||
|
"audio/aac",
|
||||||
|
"audio/m4a",
|
||||||
|
)
|
||||||
|
|
||||||
|
GEMINI_AUDIO_FORMATS: tuple[AudioFormat, ...] = (
|
||||||
|
"audio/mp3",
|
||||||
|
"audio/mpeg",
|
||||||
|
"audio/wav",
|
||||||
|
"audio/ogg",
|
||||||
|
"audio/flac",
|
||||||
|
"audio/aac",
|
||||||
|
"audio/m4a",
|
||||||
|
"audio/opus",
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_VIDEO_FORMATS: tuple[VideoFormat, ...] = (
|
||||||
|
"video/mp4",
|
||||||
|
"video/mpeg",
|
||||||
|
"video/webm",
|
||||||
|
"video/quicktime",
|
||||||
|
)
|
||||||
|
|
||||||
|
GEMINI_VIDEO_FORMATS: tuple[VideoFormat, ...] = (
|
||||||
|
"video/mp4",
|
||||||
|
"video/mpeg",
|
||||||
|
"video/webm",
|
||||||
|
"video/quicktime",
|
||||||
|
"video/x-msvideo",
|
||||||
|
"video/x-flv",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -19,12 +112,7 @@ class ImageConstraints:
|
|||||||
max_width: int | None = None
|
max_width: int | None = None
|
||||||
max_height: int | None = None
|
max_height: int | None = None
|
||||||
max_images_per_request: int | None = None
|
max_images_per_request: int | None = None
|
||||||
supported_formats: tuple[str, ...] = (
|
supported_formats: tuple[ImageFormat, ...] = DEFAULT_IMAGE_FORMATS
|
||||||
"image/png",
|
|
||||||
"image/jpeg",
|
|
||||||
"image/gif",
|
|
||||||
"image/webp",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -52,15 +140,7 @@ class AudioConstraints:
|
|||||||
|
|
||||||
max_size_bytes: int
|
max_size_bytes: int
|
||||||
max_duration_seconds: int | None = None
|
max_duration_seconds: int | None = None
|
||||||
supported_formats: tuple[str, ...] = (
|
supported_formats: tuple[AudioFormat, ...] = DEFAULT_AUDIO_FORMATS
|
||||||
"audio/mp3",
|
|
||||||
"audio/mpeg",
|
|
||||||
"audio/wav",
|
|
||||||
"audio/ogg",
|
|
||||||
"audio/flac",
|
|
||||||
"audio/aac",
|
|
||||||
"audio/m4a",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -75,12 +155,7 @@ class VideoConstraints:
|
|||||||
|
|
||||||
max_size_bytes: int
|
max_size_bytes: int
|
||||||
max_duration_seconds: int | None = None
|
max_duration_seconds: int | None = None
|
||||||
supported_formats: tuple[str, ...] = (
|
supported_formats: tuple[VideoFormat, ...] = DEFAULT_VIDEO_FORMATS
|
||||||
"video/mp4",
|
|
||||||
"video/mpeg",
|
|
||||||
"video/webm",
|
|
||||||
"video/quicktime",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -98,7 +173,7 @@ class ProviderConstraints:
|
|||||||
file_upload_threshold_bytes: Size threshold above which to use file upload.
|
file_upload_threshold_bytes: Size threshold above which to use file upload.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name: str
|
name: ProviderName
|
||||||
image: ImageConstraints | None = None
|
image: ImageConstraints | None = None
|
||||||
pdf: PDFConstraints | None = None
|
pdf: PDFConstraints | None = None
|
||||||
audio: AudioConstraints | None = None
|
audio: AudioConstraints | None = None
|
||||||
@@ -114,7 +189,6 @@ ANTHROPIC_CONSTRAINTS = ProviderConstraints(
|
|||||||
max_size_bytes=5 * 1024 * 1024,
|
max_size_bytes=5 * 1024 * 1024,
|
||||||
max_width=8000,
|
max_width=8000,
|
||||||
max_height=8000,
|
max_height=8000,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
|
||||||
),
|
),
|
||||||
pdf=PDFConstraints(
|
pdf=PDFConstraints(
|
||||||
max_size_bytes=30 * 1024 * 1024,
|
max_size_bytes=30 * 1024 * 1024,
|
||||||
@@ -129,9 +203,7 @@ OPENAI_CONSTRAINTS = ProviderConstraints(
|
|||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=20 * 1024 * 1024,
|
max_size_bytes=20 * 1024 * 1024,
|
||||||
max_images_per_request=10,
|
max_images_per_request=10,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
|
||||||
),
|
),
|
||||||
pdf=None,
|
|
||||||
supports_file_upload=True,
|
supports_file_upload=True,
|
||||||
file_upload_threshold_bytes=5 * 1024 * 1024,
|
file_upload_threshold_bytes=5 * 1024 * 1024,
|
||||||
)
|
)
|
||||||
@@ -140,41 +212,18 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
|
|||||||
name="gemini",
|
name="gemini",
|
||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=100 * 1024 * 1024,
|
max_size_bytes=100 * 1024 * 1024,
|
||||||
supported_formats=(
|
supported_formats=GEMINI_IMAGE_FORMATS,
|
||||||
"image/png",
|
|
||||||
"image/jpeg",
|
|
||||||
"image/gif",
|
|
||||||
"image/webp",
|
|
||||||
"image/heic",
|
|
||||||
"image/heif",
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
pdf=PDFConstraints(
|
pdf=PDFConstraints(
|
||||||
max_size_bytes=50 * 1024 * 1024,
|
max_size_bytes=50 * 1024 * 1024,
|
||||||
),
|
),
|
||||||
audio=AudioConstraints(
|
audio=AudioConstraints(
|
||||||
max_size_bytes=100 * 1024 * 1024,
|
max_size_bytes=100 * 1024 * 1024,
|
||||||
supported_formats=(
|
supported_formats=GEMINI_AUDIO_FORMATS,
|
||||||
"audio/mp3",
|
|
||||||
"audio/mpeg",
|
|
||||||
"audio/wav",
|
|
||||||
"audio/ogg",
|
|
||||||
"audio/flac",
|
|
||||||
"audio/aac",
|
|
||||||
"audio/m4a",
|
|
||||||
"audio/opus",
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
video=VideoConstraints(
|
video=VideoConstraints(
|
||||||
max_size_bytes=2 * 1024 * 1024 * 1024,
|
max_size_bytes=2 * 1024 * 1024 * 1024,
|
||||||
supported_formats=(
|
supported_formats=GEMINI_VIDEO_FORMATS,
|
||||||
"video/mp4",
|
|
||||||
"video/mpeg",
|
|
||||||
"video/webm",
|
|
||||||
"video/quicktime",
|
|
||||||
"video/x-msvideo",
|
|
||||||
"video/x-flv",
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
supports_file_upload=True,
|
supports_file_upload=True,
|
||||||
file_upload_threshold_bytes=20 * 1024 * 1024,
|
file_upload_threshold_bytes=20 * 1024 * 1024,
|
||||||
@@ -186,7 +235,6 @@ BEDROCK_CONSTRAINTS = ProviderConstraints(
|
|||||||
max_size_bytes=4_608_000,
|
max_size_bytes=4_608_000,
|
||||||
max_width=8000,
|
max_width=8000,
|
||||||
max_height=8000,
|
max_height=8000,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
|
||||||
),
|
),
|
||||||
pdf=PDFConstraints(
|
pdf=PDFConstraints(
|
||||||
max_size_bytes=3_840_000,
|
max_size_bytes=3_840_000,
|
||||||
@@ -199,9 +247,7 @@ AZURE_CONSTRAINTS = ProviderConstraints(
|
|||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=20 * 1024 * 1024,
|
max_size_bytes=20 * 1024 * 1024,
|
||||||
max_images_per_request=10,
|
max_images_per_request=10,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
|
||||||
),
|
),
|
||||||
pdf=None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -89,7 +89,8 @@ class FileProcessor:
|
|||||||
raise_on_error = mode == FileHandling.STRICT
|
raise_on_error = mode == FileHandling.STRICT
|
||||||
return validate_file(file, self.constraints, raise_on_error=raise_on_error)
|
return validate_file(file, self.constraints, raise_on_error=raise_on_error)
|
||||||
|
|
||||||
def _get_mode(self, file: FileInput) -> FileHandling:
|
@staticmethod
|
||||||
|
def _get_mode(file: FileInput) -> FileHandling:
|
||||||
"""Get the mode mode for a file.
|
"""Get the mode mode for a file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -201,32 +202,33 @@ class FileProcessor:
|
|||||||
"""
|
"""
|
||||||
semaphore = asyncio.Semaphore(max_concurrency)
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
|
|
||||||
async def process_one(
|
async def process_single(
|
||||||
name: str, file: FileInput
|
key: str, input_file: FileInput
|
||||||
) -> tuple[str, FileInput | Sequence[FileInput]]:
|
) -> tuple[str, FileInput | Sequence[FileInput]]:
|
||||||
|
"""Process a single file with semaphore limiting."""
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
processed = await loop.run_in_executor(None, self.process, file)
|
result = await loop.run_in_executor(None, self.process, input_file)
|
||||||
return name, processed
|
return key, result
|
||||||
|
|
||||||
tasks = [process_one(n, f) for n, f in files.items()]
|
tasks = [process_single(n, f) for n, f in files.items()]
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
gather_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
output: dict[str, FileInput] = {}
|
output: dict[str, FileInput] = {}
|
||||||
for result in results:
|
for item in gather_results:
|
||||||
if isinstance(result, BaseException):
|
if isinstance(item, BaseException):
|
||||||
logger.error(f"Processing failed: {result}")
|
logger.error(f"Processing failed: {item}")
|
||||||
continue
|
continue
|
||||||
name, processed = result
|
entry_name, processed = item
|
||||||
if isinstance(processed, Sequence) and not isinstance(
|
if isinstance(processed, Sequence) and not isinstance(
|
||||||
processed, (str, bytes)
|
processed, (str, bytes)
|
||||||
):
|
):
|
||||||
for i, chunk in enumerate(processed):
|
for i, chunk in enumerate(processed):
|
||||||
output[f"{name}_chunk_{i}"] = chunk
|
output[f"{entry_name}_chunk_{i}"] = chunk
|
||||||
elif isinstance(
|
elif isinstance(
|
||||||
processed, (AudioFile, File, ImageFile, PDFFile, TextFile, VideoFile)
|
processed, (AudioFile, File, ImageFile, PDFFile, TextFile, VideoFile)
|
||||||
):
|
):
|
||||||
output[name] = processed
|
output[entry_name] = processed
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|||||||
@@ -305,7 +305,7 @@ def get_image_dimensions(file: ImageFile) -> tuple[int, int] | None:
|
|||||||
try:
|
try:
|
||||||
with Image.open(io.BytesIO(content)) as img:
|
with Image.open(io.BytesIO(content)) as img:
|
||||||
width, height = img.size
|
width, height = img.size
|
||||||
return (width, height)
|
return width, height
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to get image dimensions: {e}")
|
logger.warning(f"Failed to get image dimensions: {e}")
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -16,7 +16,11 @@ from crewai.files.content_types import (
|
|||||||
)
|
)
|
||||||
from crewai.files.metrics import measure_operation
|
from crewai.files.metrics import measure_operation
|
||||||
from crewai.files.processing.constraints import (
|
from crewai.files.processing.constraints import (
|
||||||
|
AudioConstraints,
|
||||||
|
ImageConstraints,
|
||||||
|
PDFConstraints,
|
||||||
ProviderConstraints,
|
ProviderConstraints,
|
||||||
|
VideoConstraints,
|
||||||
get_constraints_for_provider,
|
get_constraints_for_provider,
|
||||||
)
|
)
|
||||||
from crewai.files.resolved import (
|
from crewai.files.resolved import (
|
||||||
@@ -91,7 +95,8 @@ class FileResolver:
|
|||||||
upload_cache: UploadCache | None = None
|
upload_cache: UploadCache | None = None
|
||||||
_uploaders: dict[str, FileUploader] = field(default_factory=dict)
|
_uploaders: dict[str, FileUploader] = field(default_factory=dict)
|
||||||
|
|
||||||
def _build_file_context(self, file: FileInput) -> FileContext:
|
@staticmethod
|
||||||
|
def _build_file_context(file: FileInput) -> FileContext:
|
||||||
"""Build context by reading file once.
|
"""Build context by reading file once.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -149,6 +154,30 @@ class FileResolver:
|
|||||||
"""
|
"""
|
||||||
return {name: self.resolve(file, provider) for name, file in files.items()}
|
return {name: self.resolve(file, provider) for name, file in files.items()}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_type_constraint(
|
||||||
|
content_type: str,
|
||||||
|
constraints: ProviderConstraints,
|
||||||
|
) -> ImageConstraints | PDFConstraints | AudioConstraints | VideoConstraints | None:
|
||||||
|
"""Get type-specific constraint based on content type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_type: MIME type of the file.
|
||||||
|
constraints: Provider constraints.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Type-specific constraint or None if not found.
|
||||||
|
"""
|
||||||
|
if content_type.startswith("image/"):
|
||||||
|
return constraints.image
|
||||||
|
if content_type == "application/pdf":
|
||||||
|
return constraints.pdf
|
||||||
|
if content_type.startswith("audio/"):
|
||||||
|
return constraints.audio
|
||||||
|
if content_type.startswith("video/"):
|
||||||
|
return constraints.video
|
||||||
|
return None
|
||||||
|
|
||||||
def _should_upload(
|
def _should_upload(
|
||||||
self,
|
self,
|
||||||
file: FileInput,
|
file: FileInput,
|
||||||
@@ -158,6 +187,10 @@ class FileResolver:
|
|||||||
) -> bool:
|
) -> bool:
|
||||||
"""Determine if a file should be uploaded rather than inlined.
|
"""Determine if a file should be uploaded rather than inlined.
|
||||||
|
|
||||||
|
Uses type-specific constraints to make smarter decisions:
|
||||||
|
- Checks if file exceeds type-specific inline size limits
|
||||||
|
- Falls back to general threshold if no type-specific constraint
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: The file to check.
|
file: The file to check.
|
||||||
provider: Provider name.
|
provider: Provider name.
|
||||||
@@ -173,8 +206,21 @@ class FileResolver:
|
|||||||
if self.config.prefer_upload:
|
if self.config.prefer_upload:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
content_type = file.content_type
|
||||||
|
type_constraint = self._get_type_constraint(content_type, constraints)
|
||||||
|
|
||||||
|
if type_constraint is not None:
|
||||||
|
# Check if file exceeds type-specific inline limit
|
||||||
|
if file_size > type_constraint.max_size_bytes:
|
||||||
|
logger.debug(
|
||||||
|
f"File {file.filename} ({file_size}B) exceeds {content_type} "
|
||||||
|
f"inline limit ({type_constraint.max_size_bytes}B) for {provider}"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Fall back to general threshold
|
||||||
threshold = self.config.upload_threshold_bytes
|
threshold = self.config.upload_threshold_bytes
|
||||||
if threshold is None and constraints is not None:
|
if threshold is None:
|
||||||
threshold = constraints.file_upload_threshold_bytes
|
threshold = constraints.file_upload_threshold_bytes
|
||||||
|
|
||||||
if threshold is not None and file_size > threshold:
|
if threshold is not None and file_size > threshold:
|
||||||
@@ -239,8 +285,8 @@ class FileResolver:
|
|||||||
file_uri=result.file_uri,
|
file_uri=result.file_uri,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
def _upload_with_retry(
|
def _upload_with_retry(
|
||||||
self,
|
|
||||||
uploader: FileUploader,
|
uploader: FileUploader,
|
||||||
file: FileInput,
|
file: FileInput,
|
||||||
provider: str,
|
provider: str,
|
||||||
@@ -312,13 +358,14 @@ class FileResolver:
|
|||||||
"""Resolve a file as inline content.
|
"""Resolve a file as inline content.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file: The file to resolve.
|
file: The file to resolve (used for logging).
|
||||||
provider: Provider name.
|
provider: Provider name.
|
||||||
context: Pre-computed file context.
|
context: Pre-computed file context.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
InlineBase64 or InlineBytes depending on provider.
|
InlineBase64 or InlineBytes depending on provider.
|
||||||
"""
|
"""
|
||||||
|
logger.debug(f"Resolving {file.filename} as inline for {provider}")
|
||||||
if self.config.use_bytes_for_bedrock and "bedrock" in provider:
|
if self.config.use_bytes_for_bedrock and "bedrock" in provider:
|
||||||
return InlineBytes(
|
return InlineBytes(
|
||||||
content_type=context.content_type,
|
content_type=context.content_type,
|
||||||
@@ -374,21 +421,24 @@ class FileResolver:
|
|||||||
"""
|
"""
|
||||||
semaphore = asyncio.Semaphore(max_concurrency)
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
|
|
||||||
async def resolve_one(name: str, file: FileInput) -> tuple[str, ResolvedFile]:
|
async def resolve_single(
|
||||||
|
entry_key: str, input_file: FileInput
|
||||||
|
) -> tuple[str, ResolvedFile]:
|
||||||
|
"""Resolve a single file with semaphore limiting."""
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
resolved = await self.aresolve(file, provider)
|
entry_resolved = await self.aresolve(input_file, provider)
|
||||||
return name, resolved
|
return entry_key, entry_resolved
|
||||||
|
|
||||||
tasks = [resolve_one(n, f) for n, f in files.items()]
|
tasks = [resolve_single(n, f) for n, f in files.items()]
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
gather_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
output: dict[str, ResolvedFile] = {}
|
output: dict[str, ResolvedFile] = {}
|
||||||
for result in results:
|
for item in gather_results:
|
||||||
if isinstance(result, BaseException):
|
if isinstance(item, BaseException):
|
||||||
logger.error(f"Resolution failed: {result}")
|
logger.error(f"Resolution failed: {item}")
|
||||||
continue
|
continue
|
||||||
name, resolved = result
|
key, resolved = item
|
||||||
output[name] = resolved
|
output[key] = resolved
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@@ -451,8 +501,8 @@ class FileResolver:
|
|||||||
file_uri=result.file_uri,
|
file_uri=result.file_uri,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
async def _aupload_with_retry(
|
async def _aupload_with_retry(
|
||||||
self,
|
|
||||||
uploader: FileUploader,
|
uploader: FileUploader,
|
||||||
file: FileInput,
|
file: FileInput,
|
||||||
provider: str,
|
provider: str,
|
||||||
@@ -559,17 +609,24 @@ def create_resolver(
|
|||||||
"""Create a configured FileResolver.
|
"""Create a configured FileResolver.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
provider: Optional provider name for provider-specific configuration.
|
provider: Optional provider name to load default threshold from constraints.
|
||||||
prefer_upload: Whether to prefer upload over inline.
|
prefer_upload: Whether to prefer upload over inline.
|
||||||
upload_threshold_bytes: Size threshold for using upload.
|
upload_threshold_bytes: Size threshold for using upload. If None and
|
||||||
|
provider is specified, uses provider's default threshold.
|
||||||
enable_cache: Whether to enable upload caching.
|
enable_cache: Whether to enable upload caching.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Configured FileResolver instance.
|
Configured FileResolver instance.
|
||||||
"""
|
"""
|
||||||
|
threshold = upload_threshold_bytes
|
||||||
|
if threshold is None and provider is not None:
|
||||||
|
constraints = get_constraints_for_provider(provider)
|
||||||
|
if constraints is not None:
|
||||||
|
threshold = constraints.file_upload_threshold_bytes
|
||||||
|
|
||||||
config = FileResolverConfig(
|
config = FileResolverConfig(
|
||||||
prefer_upload=prefer_upload,
|
prefer_upload=prefer_upload,
|
||||||
upload_threshold_bytes=upload_threshold_bytes,
|
upload_threshold_bytes=threshold,
|
||||||
)
|
)
|
||||||
|
|
||||||
cache = UploadCache() if enable_cache else None
|
cache = UploadCache() if enable_cache else None
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile
|
FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
DEFAULT_TTL_SECONDS = 24 * 60 * 60 # 24 hours
|
DEFAULT_TTL_SECONDS = 24 * 60 * 60 # 24 hours
|
||||||
@@ -139,7 +138,6 @@ class UploadCache:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._cache = Cache(
|
self._cache = Cache(
|
||||||
Cache.MEMORY,
|
|
||||||
serializer=PickleSerializer(),
|
serializer=PickleSerializer(),
|
||||||
namespace=namespace,
|
namespace=namespace,
|
||||||
)
|
)
|
||||||
@@ -406,7 +404,8 @@ class UploadCache:
|
|||||||
results.append(cached)
|
results.append(cached)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def _run_sync(self, coro: Any) -> Any:
|
@staticmethod
|
||||||
|
def _run_sync(coro: Any) -> Any:
|
||||||
"""Run an async coroutine from sync context without blocking event loop."""
|
"""Run an async coroutine from sync context without blocking event loop."""
|
||||||
try:
|
try:
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
@@ -549,7 +548,7 @@ def _cleanup_on_exit() -> None:
|
|||||||
from crewai.files.cleanup import cleanup_uploaded_files
|
from crewai.files.cleanup import cleanup_uploaded_files
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cleanup_uploaded_files(_default_cache, delete_from_provider=True)
|
cleanup_uploaded_files(_default_cache)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Error during exit cleanup: {e}")
|
logger.debug(f"Error during exit cleanup: {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -204,7 +204,8 @@ class BedrockFileUploader(FileUploader):
|
|||||||
"""
|
"""
|
||||||
return f"s3://{self.bucket_name}/{key}"
|
return f"s3://{self.bucket_name}/{key}"
|
||||||
|
|
||||||
def _get_transfer_config(self) -> Any:
|
@staticmethod
|
||||||
|
def _get_transfer_config() -> Any:
|
||||||
"""Get boto3 TransferConfig for multipart uploads."""
|
"""Get boto3 TransferConfig for multipart uploads."""
|
||||||
from boto3.s3.transfer import TransferConfig
|
from boto3.s3.transfer import TransferConfig
|
||||||
|
|
||||||
|
|||||||
@@ -388,7 +388,8 @@ class OpenAIFileUploader(FileUploader):
|
|||||||
logger.debug(f"Failed to cancel upload: {cancel_err}")
|
logger.debug(f"Failed to cancel upload: {cancel_err}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _classify_error(self, e: Exception, filename: str | None) -> Exception:
|
@staticmethod
|
||||||
|
def _classify_error(e: Exception, filename: str | None) -> Exception:
|
||||||
"""Classify an exception as transient or permanent.
|
"""Classify an exception as transient or permanent.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|||||||
Reference in New Issue
Block a user