refactor: fix IDE warnings and add Literal types to constraints

- Add Literal types for ImageFormat, AudioFormat, VideoFormat, ProviderName
- Convert methods to @staticmethod where appropriate
- Remove redundant default parameter values
- Fix variable shadowing in nested functions
- Make magic import optional with mimetypes fallback
- Add docstrings to inner functions
This commit is contained in:
Greyson LaLonde
2026-01-22 02:54:29 -05:00
parent 1353cb2a33
commit 0a250a45ce
9 changed files with 239 additions and 115 deletions

View File

@@ -237,9 +237,12 @@ async def acleanup_uploaded_files(
if delete_from_provider: if delete_from_provider:
semaphore = asyncio.Semaphore(max_concurrency) semaphore = asyncio.Semaphore(max_concurrency)
async def delete_one(uploader: FileUploader, upload: CachedUpload) -> bool: async def delete_one(file_uploader: FileUploader, cached: CachedUpload) -> bool:
"""Delete a single file with semaphore limiting."""
async with semaphore: async with semaphore:
return await _asafe_delete(uploader, upload.file_id, upload.provider) return await _asafe_delete(
file_uploader, cached.file_id, cached.provider
)
tasks: list[asyncio.Task[bool]] = [] tasks: list[asyncio.Task[bool]] = []
for provider, uploads in provider_uploads.items(): for provider, uploads in provider_uploads.items():
@@ -251,7 +254,7 @@ async def acleanup_uploaded_files(
continue continue
tasks.extend( tasks.extend(
asyncio.create_task(delete_one(uploader, upload)) for upload in uploads asyncio.create_task(delete_one(uploader, cached)) for cached in uploads
) )
results = await asyncio.gather(*tasks, return_exceptions=True) results = await asyncio.gather(*tasks, return_exceptions=True)
@@ -291,19 +294,20 @@ async def acleanup_expired_files(
if delete_from_provider and expired_entries: if delete_from_provider and expired_entries:
semaphore = asyncio.Semaphore(max_concurrency) semaphore = asyncio.Semaphore(max_concurrency)
async def delete_expired(upload: CachedUpload) -> None: async def delete_expired(cached: CachedUpload) -> None:
"""Delete an expired file with semaphore limiting."""
async with semaphore: async with semaphore:
uploader = get_uploader(upload.provider) file_uploader = get_uploader(cached.provider)
if uploader is not None: if file_uploader is not None:
try: try:
await uploader.adelete(upload.file_id) await file_uploader.adelete(cached.file_id)
except Exception as e: except Exception as e:
logger.debug( logger.debug(
f"Could not delete expired file {upload.file_id}: {e}" f"Could not delete expired file {cached.file_id}: {e}"
) )
await asyncio.gather( await asyncio.gather(
*[delete_expired(upload) for upload in expired_entries], *[delete_expired(cached) for cached in expired_entries],
return_exceptions=True, return_exceptions=True,
) )
@@ -337,18 +341,19 @@ async def acleanup_provider_files(
semaphore = asyncio.Semaphore(max_concurrency) semaphore = asyncio.Semaphore(max_concurrency)
async def delete_file(file_id: str) -> bool: async def delete_single(target_file_id: str) -> bool:
"""Delete a single file with semaphore limiting."""
async with semaphore: async with semaphore:
return await uploader.adelete(file_id) return await uploader.adelete(target_file_id)
if delete_all_from_provider: if delete_all_from_provider:
try: try:
files = uploader.list_files() files = uploader.list_files()
tasks = [] tasks = []
for file_info in files: for file_info in files:
file_id = file_info.get("id") or file_info.get("name") fid = file_info.get("id") or file_info.get("name")
if file_id: if fid:
tasks.append(delete_file(file_id)) tasks.append(delete_single(fid))
results = await asyncio.gather(*tasks, return_exceptions=True) results = await asyncio.gather(*tasks, return_exceptions=True)
deleted = sum(1 for r in results if r is True) deleted = sum(1 for r in results if r is True)
except Exception as e: except Exception as e:
@@ -357,7 +362,7 @@ async def acleanup_provider_files(
uploads = await cache.aget_all_for_provider(provider) uploads = await cache.aget_all_for_provider(provider)
tasks = [] tasks = []
for upload in uploads: for upload in uploads:
tasks.append(delete_file(upload.file_id)) tasks.append(delete_single(upload.file_id))
results = await asyncio.gather(*tasks, return_exceptions=True) results = await asyncio.gather(*tasks, return_exceptions=True)
for upload, result in zip(uploads, results, strict=False): for upload, result in zip(uploads, results, strict=False):
if result is True: if result is True:

View File

@@ -3,11 +3,11 @@
from __future__ import annotations from __future__ import annotations
from collections.abc import AsyncIterator, Iterator from collections.abc import AsyncIterator, Iterator
import mimetypes
from pathlib import Path from pathlib import Path
from typing import Annotated, Any, BinaryIO, Protocol, cast, runtime_checkable from typing import Annotated, Any, BinaryIO, Protocol, cast, runtime_checkable
import aiofiles import aiofiles
import magic
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
BeforeValidator, BeforeValidator,
@@ -52,17 +52,30 @@ ValidatedAsyncReadable = Annotated[AsyncReadable, _AsyncReadableValidator()]
DEFAULT_MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 # 500MB DEFAULT_MAX_FILE_SIZE_BYTES = 500 * 1024 * 1024 # 500MB
def detect_content_type(data: bytes) -> str: def detect_content_type(data: bytes, filename: str | None = None) -> str:
"""Detect MIME type from file content. """Detect MIME type from file content.
Uses python-magic if available for accurate content-based detection,
falls back to mimetypes module using filename extension.
Args: Args:
data: Raw bytes to analyze. data: Raw bytes to analyze.
filename: Optional filename for extension-based fallback.
Returns: Returns:
The detected MIME type. The detected MIME type.
""" """
result: str = magic.from_buffer(data, mime=True) try:
return result import magic
result: str = magic.from_buffer(data, mime=True)
return result
except ImportError:
if filename:
mime_type, _ = mimetypes.guess_type(filename)
if mime_type:
return mime_type
return "application/octet-stream"
class _BinaryIOValidator: class _BinaryIOValidator:
@@ -139,7 +152,7 @@ class FilePath(BaseModel):
@property @property
def content_type(self) -> str: def content_type(self) -> str:
"""Get the content type by reading file content.""" """Get the content type by reading file content."""
return detect_content_type(self.read()) return detect_content_type(self.read(), self.filename)
def read(self) -> bytes: def read(self) -> bytes:
"""Read the file content from disk.""" """Read the file content from disk."""
@@ -190,7 +203,7 @@ class FileBytes(BaseModel):
@property @property
def content_type(self) -> str: def content_type(self) -> str:
"""Get the content type from the data.""" """Get the content type from the data."""
return detect_content_type(self.data) return detect_content_type(self.data, self.filename)
def read(self) -> bytes: def read(self) -> bytes:
"""Return the bytes content.""" """Return the bytes content."""
@@ -242,7 +255,7 @@ class FileStream(BaseModel):
@property @property
def content_type(self) -> str: def content_type(self) -> str:
"""Get the content type from stream content.""" """Get the content type from stream content."""
return detect_content_type(self.read()) return detect_content_type(self.read(), self.filename)
def read(self) -> bytes: def read(self) -> bytes:
"""Read the stream content. Content is cached after first read.""" """Read the stream content. Content is cached after first read."""
@@ -310,7 +323,7 @@ class AsyncFileStream(BaseModel):
"""Get the content type from stream content. Requires aread() first.""" """Get the content type from stream content. Requires aread() first."""
if self._content is None: if self._content is None:
raise RuntimeError("Call aread() first to load content") raise RuntimeError("Call aread() first to load content")
return detect_content_type(self._content) return detect_content_type(self._content, self.filename)
async def aread(self) -> bytes: async def aread(self) -> bytes:
"""Async read the stream content. Content is cached after first read.""" """Async read the stream content. Content is cached after first read."""

View File

@@ -1,6 +1,99 @@
"""Provider-specific file constraints for multimodal content.""" """Provider-specific file constraints for multimodal content."""
from dataclasses import dataclass from dataclasses import dataclass
from typing import Literal
ImageFormat = Literal[
"image/png",
"image/jpeg",
"image/gif",
"image/webp",
"image/heic",
"image/heif",
]
AudioFormat = Literal[
"audio/mp3",
"audio/mpeg",
"audio/wav",
"audio/ogg",
"audio/flac",
"audio/aac",
"audio/m4a",
"audio/opus",
]
VideoFormat = Literal[
"video/mp4",
"video/mpeg",
"video/webm",
"video/quicktime",
"video/x-msvideo",
"video/x-flv",
]
ProviderName = Literal[
"anthropic",
"openai",
"gemini",
"bedrock",
"azure",
]
# Pre-typed format tuples for common combinations
DEFAULT_IMAGE_FORMATS: tuple[ImageFormat, ...] = (
"image/png",
"image/jpeg",
"image/gif",
"image/webp",
)
GEMINI_IMAGE_FORMATS: tuple[ImageFormat, ...] = (
"image/png",
"image/jpeg",
"image/gif",
"image/webp",
"image/heic",
"image/heif",
)
DEFAULT_AUDIO_FORMATS: tuple[AudioFormat, ...] = (
"audio/mp3",
"audio/mpeg",
"audio/wav",
"audio/ogg",
"audio/flac",
"audio/aac",
"audio/m4a",
)
GEMINI_AUDIO_FORMATS: tuple[AudioFormat, ...] = (
"audio/mp3",
"audio/mpeg",
"audio/wav",
"audio/ogg",
"audio/flac",
"audio/aac",
"audio/m4a",
"audio/opus",
)
DEFAULT_VIDEO_FORMATS: tuple[VideoFormat, ...] = (
"video/mp4",
"video/mpeg",
"video/webm",
"video/quicktime",
)
GEMINI_VIDEO_FORMATS: tuple[VideoFormat, ...] = (
"video/mp4",
"video/mpeg",
"video/webm",
"video/quicktime",
"video/x-msvideo",
"video/x-flv",
)
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -19,12 +112,7 @@ class ImageConstraints:
max_width: int | None = None max_width: int | None = None
max_height: int | None = None max_height: int | None = None
max_images_per_request: int | None = None max_images_per_request: int | None = None
supported_formats: tuple[str, ...] = ( supported_formats: tuple[ImageFormat, ...] = DEFAULT_IMAGE_FORMATS
"image/png",
"image/jpeg",
"image/gif",
"image/webp",
)
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -52,15 +140,7 @@ class AudioConstraints:
max_size_bytes: int max_size_bytes: int
max_duration_seconds: int | None = None max_duration_seconds: int | None = None
supported_formats: tuple[str, ...] = ( supported_formats: tuple[AudioFormat, ...] = DEFAULT_AUDIO_FORMATS
"audio/mp3",
"audio/mpeg",
"audio/wav",
"audio/ogg",
"audio/flac",
"audio/aac",
"audio/m4a",
)
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -75,12 +155,7 @@ class VideoConstraints:
max_size_bytes: int max_size_bytes: int
max_duration_seconds: int | None = None max_duration_seconds: int | None = None
supported_formats: tuple[str, ...] = ( supported_formats: tuple[VideoFormat, ...] = DEFAULT_VIDEO_FORMATS
"video/mp4",
"video/mpeg",
"video/webm",
"video/quicktime",
)
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -98,7 +173,7 @@ class ProviderConstraints:
file_upload_threshold_bytes: Size threshold above which to use file upload. file_upload_threshold_bytes: Size threshold above which to use file upload.
""" """
name: str name: ProviderName
image: ImageConstraints | None = None image: ImageConstraints | None = None
pdf: PDFConstraints | None = None pdf: PDFConstraints | None = None
audio: AudioConstraints | None = None audio: AudioConstraints | None = None
@@ -114,7 +189,6 @@ ANTHROPIC_CONSTRAINTS = ProviderConstraints(
max_size_bytes=5 * 1024 * 1024, max_size_bytes=5 * 1024 * 1024,
max_width=8000, max_width=8000,
max_height=8000, max_height=8000,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
), ),
pdf=PDFConstraints( pdf=PDFConstraints(
max_size_bytes=30 * 1024 * 1024, max_size_bytes=30 * 1024 * 1024,
@@ -129,9 +203,7 @@ OPENAI_CONSTRAINTS = ProviderConstraints(
image=ImageConstraints( image=ImageConstraints(
max_size_bytes=20 * 1024 * 1024, max_size_bytes=20 * 1024 * 1024,
max_images_per_request=10, max_images_per_request=10,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
), ),
pdf=None,
supports_file_upload=True, supports_file_upload=True,
file_upload_threshold_bytes=5 * 1024 * 1024, file_upload_threshold_bytes=5 * 1024 * 1024,
) )
@@ -140,41 +212,18 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
name="gemini", name="gemini",
image=ImageConstraints( image=ImageConstraints(
max_size_bytes=100 * 1024 * 1024, max_size_bytes=100 * 1024 * 1024,
supported_formats=( supported_formats=GEMINI_IMAGE_FORMATS,
"image/png",
"image/jpeg",
"image/gif",
"image/webp",
"image/heic",
"image/heif",
),
), ),
pdf=PDFConstraints( pdf=PDFConstraints(
max_size_bytes=50 * 1024 * 1024, max_size_bytes=50 * 1024 * 1024,
), ),
audio=AudioConstraints( audio=AudioConstraints(
max_size_bytes=100 * 1024 * 1024, max_size_bytes=100 * 1024 * 1024,
supported_formats=( supported_formats=GEMINI_AUDIO_FORMATS,
"audio/mp3",
"audio/mpeg",
"audio/wav",
"audio/ogg",
"audio/flac",
"audio/aac",
"audio/m4a",
"audio/opus",
),
), ),
video=VideoConstraints( video=VideoConstraints(
max_size_bytes=2 * 1024 * 1024 * 1024, max_size_bytes=2 * 1024 * 1024 * 1024,
supported_formats=( supported_formats=GEMINI_VIDEO_FORMATS,
"video/mp4",
"video/mpeg",
"video/webm",
"video/quicktime",
"video/x-msvideo",
"video/x-flv",
),
), ),
supports_file_upload=True, supports_file_upload=True,
file_upload_threshold_bytes=20 * 1024 * 1024, file_upload_threshold_bytes=20 * 1024 * 1024,
@@ -186,7 +235,6 @@ BEDROCK_CONSTRAINTS = ProviderConstraints(
max_size_bytes=4_608_000, max_size_bytes=4_608_000,
max_width=8000, max_width=8000,
max_height=8000, max_height=8000,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
), ),
pdf=PDFConstraints( pdf=PDFConstraints(
max_size_bytes=3_840_000, max_size_bytes=3_840_000,
@@ -199,9 +247,7 @@ AZURE_CONSTRAINTS = ProviderConstraints(
image=ImageConstraints( image=ImageConstraints(
max_size_bytes=20 * 1024 * 1024, max_size_bytes=20 * 1024 * 1024,
max_images_per_request=10, max_images_per_request=10,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
), ),
pdf=None,
) )

View File

@@ -89,7 +89,8 @@ class FileProcessor:
raise_on_error = mode == FileHandling.STRICT raise_on_error = mode == FileHandling.STRICT
return validate_file(file, self.constraints, raise_on_error=raise_on_error) return validate_file(file, self.constraints, raise_on_error=raise_on_error)
def _get_mode(self, file: FileInput) -> FileHandling: @staticmethod
def _get_mode(file: FileInput) -> FileHandling:
"""Get the mode mode for a file. """Get the mode mode for a file.
Args: Args:
@@ -201,32 +202,33 @@ class FileProcessor:
""" """
semaphore = asyncio.Semaphore(max_concurrency) semaphore = asyncio.Semaphore(max_concurrency)
async def process_one( async def process_single(
name: str, file: FileInput key: str, input_file: FileInput
) -> tuple[str, FileInput | Sequence[FileInput]]: ) -> tuple[str, FileInput | Sequence[FileInput]]:
"""Process a single file with semaphore limiting."""
async with semaphore: async with semaphore:
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
processed = await loop.run_in_executor(None, self.process, file) result = await loop.run_in_executor(None, self.process, input_file)
return name, processed return key, result
tasks = [process_one(n, f) for n, f in files.items()] tasks = [process_single(n, f) for n, f in files.items()]
results = await asyncio.gather(*tasks, return_exceptions=True) gather_results = await asyncio.gather(*tasks, return_exceptions=True)
output: dict[str, FileInput] = {} output: dict[str, FileInput] = {}
for result in results: for item in gather_results:
if isinstance(result, BaseException): if isinstance(item, BaseException):
logger.error(f"Processing failed: {result}") logger.error(f"Processing failed: {item}")
continue continue
name, processed = result entry_name, processed = item
if isinstance(processed, Sequence) and not isinstance( if isinstance(processed, Sequence) and not isinstance(
processed, (str, bytes) processed, (str, bytes)
): ):
for i, chunk in enumerate(processed): for i, chunk in enumerate(processed):
output[f"{name}_chunk_{i}"] = chunk output[f"{entry_name}_chunk_{i}"] = chunk
elif isinstance( elif isinstance(
processed, (AudioFile, File, ImageFile, PDFFile, TextFile, VideoFile) processed, (AudioFile, File, ImageFile, PDFFile, TextFile, VideoFile)
): ):
output[name] = processed output[entry_name] = processed
return output return output

View File

@@ -305,7 +305,7 @@ def get_image_dimensions(file: ImageFile) -> tuple[int, int] | None:
try: try:
with Image.open(io.BytesIO(content)) as img: with Image.open(io.BytesIO(content)) as img:
width, height = img.size width, height = img.size
return (width, height) return width, height
except Exception as e: except Exception as e:
logger.warning(f"Failed to get image dimensions: {e}") logger.warning(f"Failed to get image dimensions: {e}")
return None return None

View File

@@ -16,7 +16,11 @@ from crewai.files.content_types import (
) )
from crewai.files.metrics import measure_operation from crewai.files.metrics import measure_operation
from crewai.files.processing.constraints import ( from crewai.files.processing.constraints import (
AudioConstraints,
ImageConstraints,
PDFConstraints,
ProviderConstraints, ProviderConstraints,
VideoConstraints,
get_constraints_for_provider, get_constraints_for_provider,
) )
from crewai.files.resolved import ( from crewai.files.resolved import (
@@ -91,7 +95,8 @@ class FileResolver:
upload_cache: UploadCache | None = None upload_cache: UploadCache | None = None
_uploaders: dict[str, FileUploader] = field(default_factory=dict) _uploaders: dict[str, FileUploader] = field(default_factory=dict)
def _build_file_context(self, file: FileInput) -> FileContext: @staticmethod
def _build_file_context(file: FileInput) -> FileContext:
"""Build context by reading file once. """Build context by reading file once.
Args: Args:
@@ -149,6 +154,30 @@ class FileResolver:
""" """
return {name: self.resolve(file, provider) for name, file in files.items()} return {name: self.resolve(file, provider) for name, file in files.items()}
@staticmethod
def _get_type_constraint(
content_type: str,
constraints: ProviderConstraints,
) -> ImageConstraints | PDFConstraints | AudioConstraints | VideoConstraints | None:
"""Get type-specific constraint based on content type.
Args:
content_type: MIME type of the file.
constraints: Provider constraints.
Returns:
Type-specific constraint or None if not found.
"""
if content_type.startswith("image/"):
return constraints.image
if content_type == "application/pdf":
return constraints.pdf
if content_type.startswith("audio/"):
return constraints.audio
if content_type.startswith("video/"):
return constraints.video
return None
def _should_upload( def _should_upload(
self, self,
file: FileInput, file: FileInput,
@@ -158,6 +187,10 @@ class FileResolver:
) -> bool: ) -> bool:
"""Determine if a file should be uploaded rather than inlined. """Determine if a file should be uploaded rather than inlined.
Uses type-specific constraints to make smarter decisions:
- Checks if file exceeds type-specific inline size limits
- Falls back to general threshold if no type-specific constraint
Args: Args:
file: The file to check. file: The file to check.
provider: Provider name. provider: Provider name.
@@ -173,8 +206,21 @@ class FileResolver:
if self.config.prefer_upload: if self.config.prefer_upload:
return True return True
content_type = file.content_type
type_constraint = self._get_type_constraint(content_type, constraints)
if type_constraint is not None:
# Check if file exceeds type-specific inline limit
if file_size > type_constraint.max_size_bytes:
logger.debug(
f"File {file.filename} ({file_size}B) exceeds {content_type} "
f"inline limit ({type_constraint.max_size_bytes}B) for {provider}"
)
return True
# Fall back to general threshold
threshold = self.config.upload_threshold_bytes threshold = self.config.upload_threshold_bytes
if threshold is None and constraints is not None: if threshold is None:
threshold = constraints.file_upload_threshold_bytes threshold = constraints.file_upload_threshold_bytes
if threshold is not None and file_size > threshold: if threshold is not None and file_size > threshold:
@@ -239,8 +285,8 @@ class FileResolver:
file_uri=result.file_uri, file_uri=result.file_uri,
) )
@staticmethod
def _upload_with_retry( def _upload_with_retry(
self,
uploader: FileUploader, uploader: FileUploader,
file: FileInput, file: FileInput,
provider: str, provider: str,
@@ -312,13 +358,14 @@ class FileResolver:
"""Resolve a file as inline content. """Resolve a file as inline content.
Args: Args:
file: The file to resolve. file: The file to resolve (used for logging).
provider: Provider name. provider: Provider name.
context: Pre-computed file context. context: Pre-computed file context.
Returns: Returns:
InlineBase64 or InlineBytes depending on provider. InlineBase64 or InlineBytes depending on provider.
""" """
logger.debug(f"Resolving {file.filename} as inline for {provider}")
if self.config.use_bytes_for_bedrock and "bedrock" in provider: if self.config.use_bytes_for_bedrock and "bedrock" in provider:
return InlineBytes( return InlineBytes(
content_type=context.content_type, content_type=context.content_type,
@@ -374,21 +421,24 @@ class FileResolver:
""" """
semaphore = asyncio.Semaphore(max_concurrency) semaphore = asyncio.Semaphore(max_concurrency)
async def resolve_one(name: str, file: FileInput) -> tuple[str, ResolvedFile]: async def resolve_single(
entry_key: str, input_file: FileInput
) -> tuple[str, ResolvedFile]:
"""Resolve a single file with semaphore limiting."""
async with semaphore: async with semaphore:
resolved = await self.aresolve(file, provider) entry_resolved = await self.aresolve(input_file, provider)
return name, resolved return entry_key, entry_resolved
tasks = [resolve_one(n, f) for n, f in files.items()] tasks = [resolve_single(n, f) for n, f in files.items()]
results = await asyncio.gather(*tasks, return_exceptions=True) gather_results = await asyncio.gather(*tasks, return_exceptions=True)
output: dict[str, ResolvedFile] = {} output: dict[str, ResolvedFile] = {}
for result in results: for item in gather_results:
if isinstance(result, BaseException): if isinstance(item, BaseException):
logger.error(f"Resolution failed: {result}") logger.error(f"Resolution failed: {item}")
continue continue
name, resolved = result key, resolved = item
output[name] = resolved output[key] = resolved
return output return output
@@ -451,8 +501,8 @@ class FileResolver:
file_uri=result.file_uri, file_uri=result.file_uri,
) )
@staticmethod
async def _aupload_with_retry( async def _aupload_with_retry(
self,
uploader: FileUploader, uploader: FileUploader,
file: FileInput, file: FileInput,
provider: str, provider: str,
@@ -559,17 +609,24 @@ def create_resolver(
"""Create a configured FileResolver. """Create a configured FileResolver.
Args: Args:
provider: Optional provider name for provider-specific configuration. provider: Optional provider name to load default threshold from constraints.
prefer_upload: Whether to prefer upload over inline. prefer_upload: Whether to prefer upload over inline.
upload_threshold_bytes: Size threshold for using upload. upload_threshold_bytes: Size threshold for using upload. If None and
provider is specified, uses provider's default threshold.
enable_cache: Whether to enable upload caching. enable_cache: Whether to enable upload caching.
Returns: Returns:
Configured FileResolver instance. Configured FileResolver instance.
""" """
threshold = upload_threshold_bytes
if threshold is None and provider is not None:
constraints = get_constraints_for_provider(provider)
if constraints is not None:
threshold = constraints.file_upload_threshold_bytes
config = FileResolverConfig( config = FileResolverConfig(
prefer_upload=prefer_upload, prefer_upload=prefer_upload,
upload_threshold_bytes=upload_threshold_bytes, upload_threshold_bytes=threshold,
) )
cache = UploadCache() if enable_cache else None cache = UploadCache() if enable_cache else None

View File

@@ -28,7 +28,6 @@ if TYPE_CHECKING:
FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DEFAULT_TTL_SECONDS = 24 * 60 * 60 # 24 hours DEFAULT_TTL_SECONDS = 24 * 60 * 60 # 24 hours
@@ -139,7 +138,6 @@ class UploadCache:
) )
else: else:
self._cache = Cache( self._cache = Cache(
Cache.MEMORY,
serializer=PickleSerializer(), serializer=PickleSerializer(),
namespace=namespace, namespace=namespace,
) )
@@ -406,7 +404,8 @@ class UploadCache:
results.append(cached) results.append(cached)
return results return results
def _run_sync(self, coro: Any) -> Any: @staticmethod
def _run_sync(coro: Any) -> Any:
"""Run an async coroutine from sync context without blocking event loop.""" """Run an async coroutine from sync context without blocking event loop."""
try: try:
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
@@ -549,7 +548,7 @@ def _cleanup_on_exit() -> None:
from crewai.files.cleanup import cleanup_uploaded_files from crewai.files.cleanup import cleanup_uploaded_files
try: try:
cleanup_uploaded_files(_default_cache, delete_from_provider=True) cleanup_uploaded_files(_default_cache)
except Exception as e: except Exception as e:
logger.debug(f"Error during exit cleanup: {e}") logger.debug(f"Error during exit cleanup: {e}")

View File

@@ -204,7 +204,8 @@ class BedrockFileUploader(FileUploader):
""" """
return f"s3://{self.bucket_name}/{key}" return f"s3://{self.bucket_name}/{key}"
def _get_transfer_config(self) -> Any: @staticmethod
def _get_transfer_config() -> Any:
"""Get boto3 TransferConfig for multipart uploads.""" """Get boto3 TransferConfig for multipart uploads."""
from boto3.s3.transfer import TransferConfig from boto3.s3.transfer import TransferConfig

View File

@@ -388,7 +388,8 @@ class OpenAIFileUploader(FileUploader):
logger.debug(f"Failed to cancel upload: {cancel_err}") logger.debug(f"Failed to cancel upload: {cancel_err}")
raise raise
def _classify_error(self, e: Exception, filename: str | None) -> Exception: @staticmethod
def _classify_error(e: Exception, filename: str | None) -> Exception:
"""Classify an exception as transient or permanent. """Classify an exception as transient or permanent.
Args: Args: