mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-24 15:48:23 +00:00
- add input_files parameter to Crew.kickoff(), Flow.kickoff(), Task, and Agent.kickoff() - add provider-specific file uploaders for OpenAI, Anthropic, Gemini, and Bedrock - add file type detection, constraint validation, and automatic format conversion - add URL file source support for multimodal content - add streaming uploads for large files - add prompt caching support for Anthropic - add OpenAI Responses API support
378 lines
9.7 KiB
Python
378 lines
9.7 KiB
Python
"""Provider-specific file constraints for multimodal content."""
|
|
|
|
from dataclasses import dataclass
|
|
from functools import lru_cache
|
|
from typing import Literal
|
|
|
|
from crewai_files.core.types import (
|
|
AudioMimeType,
|
|
ImageMimeType,
|
|
TextContentType,
|
|
VideoMimeType,
|
|
)
|
|
|
|
|
|
ProviderName = Literal[
|
|
"anthropic",
|
|
"openai",
|
|
"gemini",
|
|
"bedrock",
|
|
"azure",
|
|
]
|
|
|
|
DEFAULT_IMAGE_FORMATS: tuple[ImageMimeType, ...] = (
|
|
"image/png",
|
|
"image/jpeg",
|
|
"image/gif",
|
|
"image/webp",
|
|
)
|
|
|
|
GEMINI_IMAGE_FORMATS: tuple[ImageMimeType, ...] = (
|
|
"image/png",
|
|
"image/jpeg",
|
|
"image/gif",
|
|
"image/webp",
|
|
"image/heic",
|
|
"image/heif",
|
|
)
|
|
|
|
DEFAULT_AUDIO_FORMATS: tuple[AudioMimeType, ...] = (
|
|
"audio/mp3",
|
|
"audio/mpeg",
|
|
"audio/wav",
|
|
"audio/ogg",
|
|
"audio/flac",
|
|
"audio/aac",
|
|
"audio/m4a",
|
|
)
|
|
|
|
GEMINI_AUDIO_FORMATS: tuple[AudioMimeType, ...] = (
|
|
"audio/mp3",
|
|
"audio/mpeg",
|
|
"audio/wav",
|
|
"audio/ogg",
|
|
"audio/flac",
|
|
"audio/aac",
|
|
"audio/m4a",
|
|
"audio/opus",
|
|
)
|
|
|
|
DEFAULT_VIDEO_FORMATS: tuple[VideoMimeType, ...] = (
|
|
"video/mp4",
|
|
"video/mpeg",
|
|
"video/webm",
|
|
"video/quicktime",
|
|
)
|
|
|
|
GEMINI_VIDEO_FORMATS: tuple[VideoMimeType, ...] = (
|
|
"video/mp4",
|
|
"video/mpeg",
|
|
"video/webm",
|
|
"video/quicktime",
|
|
"video/x-msvideo",
|
|
"video/x-flv",
|
|
)
|
|
|
|
DEFAULT_TEXT_FORMATS: tuple[TextContentType, ...] = (
|
|
"text/plain",
|
|
"text/markdown",
|
|
"text/csv",
|
|
"application/json",
|
|
"text/xml",
|
|
"text/html",
|
|
)
|
|
|
|
GEMINI_TEXT_FORMATS: tuple[TextContentType, ...] = (
|
|
"text/plain",
|
|
"text/markdown",
|
|
"text/csv",
|
|
"application/json",
|
|
"application/xml",
|
|
"text/xml",
|
|
"application/x-yaml",
|
|
"text/yaml",
|
|
"text/html",
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ImageConstraints:
|
|
"""Constraints for image files.
|
|
|
|
Attributes:
|
|
max_size_bytes: Maximum file size in bytes.
|
|
max_width: Maximum image width in pixels.
|
|
max_height: Maximum image height in pixels.
|
|
max_images_per_request: Maximum number of images per request.
|
|
supported_formats: Supported image MIME types.
|
|
"""
|
|
|
|
max_size_bytes: int
|
|
max_width: int | None = None
|
|
max_height: int | None = None
|
|
max_images_per_request: int | None = None
|
|
supported_formats: tuple[ImageMimeType, ...] = DEFAULT_IMAGE_FORMATS
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PDFConstraints:
|
|
"""Constraints for PDF files.
|
|
|
|
Attributes:
|
|
max_size_bytes: Maximum file size in bytes.
|
|
max_pages: Maximum number of pages.
|
|
"""
|
|
|
|
max_size_bytes: int
|
|
max_pages: int | None = None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AudioConstraints:
|
|
"""Constraints for audio files.
|
|
|
|
Attributes:
|
|
max_size_bytes: Maximum file size in bytes.
|
|
max_duration_seconds: Maximum audio duration in seconds.
|
|
supported_formats: Supported audio MIME types.
|
|
"""
|
|
|
|
max_size_bytes: int
|
|
max_duration_seconds: int | None = None
|
|
supported_formats: tuple[AudioMimeType, ...] = DEFAULT_AUDIO_FORMATS
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class VideoConstraints:
|
|
"""Constraints for video files.
|
|
|
|
Attributes:
|
|
max_size_bytes: Maximum file size in bytes.
|
|
max_duration_seconds: Maximum video duration in seconds.
|
|
supported_formats: Supported video MIME types.
|
|
"""
|
|
|
|
max_size_bytes: int
|
|
max_duration_seconds: int | None = None
|
|
supported_formats: tuple[VideoMimeType, ...] = DEFAULT_VIDEO_FORMATS
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class TextConstraints:
|
|
"""Constraints for text files.
|
|
|
|
Attributes:
|
|
max_size_bytes: Maximum file size in bytes.
|
|
supported_formats: Supported text MIME types.
|
|
"""
|
|
|
|
max_size_bytes: int
|
|
supported_formats: tuple[TextContentType, ...] = DEFAULT_TEXT_FORMATS
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ProviderConstraints:
|
|
"""Complete set of constraints for a provider.
|
|
|
|
Attributes:
|
|
name: Provider name identifier.
|
|
image: Image file constraints.
|
|
pdf: PDF file constraints.
|
|
audio: Audio file constraints.
|
|
video: Video file constraints.
|
|
text: Text file constraints.
|
|
general_max_size_bytes: Maximum size for any file type.
|
|
supports_file_upload: Whether the provider supports file upload APIs.
|
|
file_upload_threshold_bytes: Size threshold above which to use file upload.
|
|
supports_url_references: Whether the provider supports URL-based file references.
|
|
"""
|
|
|
|
name: ProviderName
|
|
image: ImageConstraints | None = None
|
|
pdf: PDFConstraints | None = None
|
|
audio: AudioConstraints | None = None
|
|
video: VideoConstraints | None = None
|
|
text: TextConstraints | None = None
|
|
general_max_size_bytes: int | None = None
|
|
supports_file_upload: bool = False
|
|
file_upload_threshold_bytes: int | None = None
|
|
supports_url_references: bool = False
|
|
|
|
|
|
ANTHROPIC_CONSTRAINTS = ProviderConstraints(
|
|
name="anthropic",
|
|
image=ImageConstraints(
|
|
max_size_bytes=5_242_880, # 5 MB per image
|
|
max_width=8000,
|
|
max_height=8000,
|
|
max_images_per_request=100,
|
|
),
|
|
pdf=PDFConstraints(
|
|
max_size_bytes=33_554_432, # 32 MB request size limit
|
|
max_pages=100,
|
|
),
|
|
supports_file_upload=True,
|
|
file_upload_threshold_bytes=5_242_880,
|
|
supports_url_references=True,
|
|
)
|
|
|
|
OPENAI_COMPLETIONS_CONSTRAINTS = ProviderConstraints(
|
|
name="openai",
|
|
image=ImageConstraints(
|
|
max_size_bytes=20_971_520,
|
|
max_images_per_request=10,
|
|
),
|
|
supports_file_upload=True,
|
|
file_upload_threshold_bytes=5_242_880,
|
|
supports_url_references=True,
|
|
)
|
|
|
|
OPENAI_RESPONSES_CONSTRAINTS = ProviderConstraints(
|
|
name="openai_responses",
|
|
image=ImageConstraints(
|
|
max_size_bytes=20_971_520,
|
|
max_images_per_request=10,
|
|
),
|
|
pdf=PDFConstraints(
|
|
max_size_bytes=33_554_432, # 32 MB total across all file inputs
|
|
max_pages=100,
|
|
),
|
|
audio=AudioConstraints(
|
|
max_size_bytes=26_214_400, # 25 MB - whisper limit
|
|
max_duration_seconds=1500, # 25 minutes, arbitrary-ish, this is from the transcriptions limit
|
|
),
|
|
supports_file_upload=True,
|
|
file_upload_threshold_bytes=5_242_880,
|
|
supports_url_references=True,
|
|
)
|
|
|
|
OPENAI_CONSTRAINTS = OPENAI_COMPLETIONS_CONSTRAINTS
|
|
|
|
GEMINI_CONSTRAINTS = ProviderConstraints(
|
|
name="gemini",
|
|
image=ImageConstraints(
|
|
max_size_bytes=104_857_600,
|
|
supported_formats=GEMINI_IMAGE_FORMATS,
|
|
),
|
|
pdf=PDFConstraints(
|
|
max_size_bytes=52_428_800,
|
|
),
|
|
audio=AudioConstraints(
|
|
max_size_bytes=104_857_600,
|
|
max_duration_seconds=34200, # 9.5 hours
|
|
supported_formats=GEMINI_AUDIO_FORMATS,
|
|
),
|
|
video=VideoConstraints(
|
|
max_size_bytes=2_147_483_648,
|
|
max_duration_seconds=3600, # 1 hour at default resolution
|
|
supported_formats=GEMINI_VIDEO_FORMATS,
|
|
),
|
|
text=TextConstraints(
|
|
max_size_bytes=104_857_600,
|
|
supported_formats=GEMINI_TEXT_FORMATS,
|
|
),
|
|
supports_file_upload=True,
|
|
file_upload_threshold_bytes=20_971_520,
|
|
supports_url_references=True,
|
|
)
|
|
|
|
BEDROCK_CONSTRAINTS = ProviderConstraints(
|
|
name="bedrock",
|
|
image=ImageConstraints(
|
|
max_size_bytes=4_608_000,
|
|
max_width=8000,
|
|
max_height=8000,
|
|
),
|
|
pdf=PDFConstraints(
|
|
max_size_bytes=3_840_000,
|
|
max_pages=100,
|
|
),
|
|
supports_url_references=True, # S3 URIs supported
|
|
)
|
|
|
|
AZURE_CONSTRAINTS = ProviderConstraints(
|
|
name="azure",
|
|
image=ImageConstraints(
|
|
max_size_bytes=20_971_520,
|
|
max_images_per_request=10,
|
|
),
|
|
audio=AudioConstraints(
|
|
max_size_bytes=26_214_400, # 25 MB - same as openai
|
|
max_duration_seconds=1500, # 25 minutes - same as openai
|
|
),
|
|
supports_url_references=True,
|
|
)
|
|
|
|
|
|
_PROVIDER_CONSTRAINTS_MAP: dict[str, ProviderConstraints] = {
|
|
"anthropic": ANTHROPIC_CONSTRAINTS,
|
|
"openai": OPENAI_CONSTRAINTS,
|
|
"openai_responses": OPENAI_RESPONSES_CONSTRAINTS,
|
|
"gemini": GEMINI_CONSTRAINTS,
|
|
"bedrock": BEDROCK_CONSTRAINTS,
|
|
"azure": AZURE_CONSTRAINTS,
|
|
"claude": ANTHROPIC_CONSTRAINTS,
|
|
"gpt": OPENAI_CONSTRAINTS,
|
|
"google": GEMINI_CONSTRAINTS,
|
|
"aws": BEDROCK_CONSTRAINTS,
|
|
}
|
|
|
|
|
|
@lru_cache(maxsize=32)
|
|
def get_constraints_for_provider(
|
|
provider: str | ProviderConstraints,
|
|
) -> ProviderConstraints | None:
|
|
"""Get constraints for a provider by name or return if already ProviderConstraints.
|
|
|
|
Args:
|
|
provider: Provider name string or ProviderConstraints instance.
|
|
|
|
Returns:
|
|
ProviderConstraints for the provider, or None if not found.
|
|
"""
|
|
if isinstance(provider, ProviderConstraints):
|
|
return provider
|
|
|
|
provider_lower = provider.lower()
|
|
|
|
if provider_lower in _PROVIDER_CONSTRAINTS_MAP:
|
|
return _PROVIDER_CONSTRAINTS_MAP[provider_lower]
|
|
|
|
for key, constraints in _PROVIDER_CONSTRAINTS_MAP.items():
|
|
if key in provider_lower:
|
|
return constraints
|
|
|
|
return None
|
|
|
|
|
|
def get_supported_content_types(provider: str, api: str | None = None) -> list[str]:
|
|
"""Get supported MIME type prefixes for a provider.
|
|
|
|
Args:
|
|
provider: Provider name string.
|
|
api: Optional API variant (e.g., "responses" for OpenAI Responses API).
|
|
|
|
Returns:
|
|
List of supported MIME type prefixes (e.g., ["image/", "application/pdf"]).
|
|
"""
|
|
lookup_key = provider
|
|
if api == "responses" and "openai" in provider.lower():
|
|
lookup_key = "openai_responses"
|
|
|
|
constraints = get_constraints_for_provider(lookup_key)
|
|
if not constraints:
|
|
return []
|
|
|
|
types: list[str] = []
|
|
if constraints.image:
|
|
types.append("image/")
|
|
if constraints.pdf:
|
|
types.append("application/pdf")
|
|
if constraints.audio:
|
|
types.append("audio/")
|
|
if constraints.video:
|
|
types.append("video/")
|
|
if constraints.text:
|
|
types.append("text/")
|
|
return types
|