refactor: centralize multimodal formatting in crewai_files

This commit is contained in:
Greyson LaLonde
2026-01-22 15:59:55 -05:00
parent b95a3a9bc8
commit ca07114bcf
18 changed files with 742 additions and 1002 deletions

View File

@@ -1,10 +1,10 @@
[project]
name = "crewai-files"
version = "0.1.0"
dynamic = ["version"]
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Greyson LaLonde", email = "greyson.r.lalonde@gmail.com" }
{ name = "Greyson LaLonde", email = "greyson@crewai.com" }
]
requires-python = ">=3.10, <3.14"
dependencies = [
@@ -17,9 +17,9 @@ dependencies = [
"av~=13.0.0",
]
[project.scripts]
crewai-files = "crewai_files:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.version]
path = "src/crewai_files/__init__.py"

View File

@@ -49,6 +49,10 @@ from crewai_files.core.types import (
VideoFile,
VideoMimeType,
)
from crewai_files.formatting import (
aformat_multimodal_content,
format_multimodal_content,
)
from crewai_files.processing import (
ANTHROPIC_CONSTRAINTS,
BEDROCK_CONSTRAINTS,
@@ -132,10 +136,12 @@ __all__ = [
"VideoExtension",
"VideoFile",
"VideoMimeType",
"aformat_multimodal_content",
"cleanup_expired_files",
"cleanup_provider_files",
"cleanup_uploaded_files",
"create_resolver",
"format_multimodal_content",
"get_constraints_for_provider",
"get_upload_cache",
"get_uploader",
@@ -143,3 +149,5 @@ __all__ = [
"reset_upload_cache",
"wrap_file_source",
]
__version__ = "1.8.1"

View File

@@ -0,0 +1,12 @@
"""High-level formatting API for multimodal content."""
from crewai_files.formatting.api import (
aformat_multimodal_content,
format_multimodal_content,
)
__all__ = [
"aformat_multimodal_content",
"format_multimodal_content",
]

View File

@@ -0,0 +1,91 @@
"""Anthropic content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
UrlReference,
)
from crewai_files.core.types import FileInput
class AnthropicFormatter:
"""Formats resolved files into Anthropic content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into an Anthropic content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
block_type = self._get_block_type(content_type)
if block_type is None:
return None
if isinstance(resolved, FileReference):
return {
"type": block_type,
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
if isinstance(resolved, UrlReference):
return {
"type": block_type,
"source": {
"type": "url",
"url": resolved.url,
},
}
if isinstance(resolved, InlineBase64):
return {
"type": block_type,
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"type": block_type,
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
@staticmethod
def _get_block_type(content_type: str) -> str | None:
"""Get Anthropic block type for content type.
Args:
content_type: MIME type.
Returns:
Block type string or None if not supported.
"""
if content_type.startswith("image/"):
return "image"
if content_type == "application/pdf":
return "document"
return None

View File

@@ -0,0 +1,277 @@
"""High-level API for formatting multimodal content."""
from __future__ import annotations
import os
from typing import Any
from crewai_files.cache.upload_cache import get_upload_cache
from crewai_files.core.types import FileInput
from crewai_files.formatting.anthropic import AnthropicFormatter
from crewai_files.formatting.bedrock import BedrockFormatter
from crewai_files.formatting.gemini import GeminiFormatter
from crewai_files.formatting.openai import OpenAIFormatter
from crewai_files.processing.constraints import get_constraints_for_provider
from crewai_files.processing.processor import FileProcessor
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
from crewai_files.uploaders.factory import ProviderType
def _normalize_provider(provider: str | None) -> ProviderType:
"""Normalize provider string to ProviderType.
Args:
provider: Raw provider string.
Returns:
Normalized provider type.
Raises:
ValueError: If provider is None or empty.
"""
if not provider:
raise ValueError("provider is required")
provider_lower = provider.lower()
if "gemini" in provider_lower:
return "gemini"
if "google" in provider_lower:
return "google"
if "anthropic" in provider_lower:
return "anthropic"
if "claude" in provider_lower:
return "claude"
if "bedrock" in provider_lower:
return "bedrock"
if "aws" in provider_lower:
return "aws"
if "azure" in provider_lower:
return "azure"
if "gpt" in provider_lower:
return "gpt"
return "openai"
def format_multimodal_content(
files: dict[str, FileInput],
provider: str | None = None,
) -> list[dict[str, Any]]:
"""Format files as provider-specific multimodal content blocks.
This is the main high-level API for converting files to content blocks
suitable for sending to LLM providers. It handles:
- File processing according to provider constraints
- Resolution (upload vs inline) based on provider capabilities
- Formatting into provider-specific content block structures
Args:
files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
Returns:
List of content blocks in the provider's expected format.
Example:
>>> from crewai_files import format_multimodal_content, ImageFile
>>> files = {"photo": ImageFile(source="image.jpg")}
>>> blocks = format_multimodal_content(files, "openai")
"""
if not files:
return []
provider_type = _normalize_provider(provider)
processor = FileProcessor(constraints=provider_type)
processed_files = processor.process_files(files)
if not processed_files:
return []
constraints = get_constraints_for_provider(provider_type)
supported_types = _get_supported_types(constraints)
supported_files = _filter_supported_files(processed_files, supported_types)
if not supported_files:
return []
config = _get_resolver_config(provider_type)
upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache)
formatter = _get_formatter(provider_type)
content_blocks: list[dict[str, Any]] = []
for name, file_input in supported_files.items():
resolved = resolver.resolve(file_input, provider_type)
block = _format_block(formatter, file_input, resolved, name)
if block is not None:
content_blocks.append(block)
return content_blocks
async def aformat_multimodal_content(
files: dict[str, FileInput],
provider: str | None = None,
) -> list[dict[str, Any]]:
"""Async format files as provider-specific multimodal content blocks.
Async version of format_multimodal_content with parallel file resolution.
Args:
files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
Returns:
List of content blocks in the provider's expected format.
"""
if not files:
return []
provider_type = _normalize_provider(provider)
processor = FileProcessor(constraints=provider_type)
processed_files = await processor.aprocess_files(files)
if not processed_files:
return []
constraints = get_constraints_for_provider(provider_type)
supported_types = _get_supported_types(constraints)
supported_files = _filter_supported_files(processed_files, supported_types)
if not supported_files:
return []
config = _get_resolver_config(provider_type)
upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, provider_type)
formatter = _get_formatter(provider_type)
content_blocks: list[dict[str, Any]] = []
for name, resolved in resolved_files.items():
file_input = supported_files[name]
block = _format_block(formatter, file_input, resolved, name)
if block is not None:
content_blocks.append(block)
return content_blocks
def _get_supported_types(
constraints: Any | None,
) -> list[str]:
"""Get list of supported MIME type prefixes from constraints.
Args:
constraints: Provider constraints.
Returns:
List of MIME type prefixes (e.g., ["image/", "application/pdf"]).
"""
if constraints is None:
return []
supported: list[str] = []
if constraints.image is not None:
supported.append("image/")
if constraints.pdf is not None:
supported.append("application/pdf")
if constraints.audio is not None:
supported.append("audio/")
if constraints.video is not None:
supported.append("video/")
return supported
def _filter_supported_files(
files: dict[str, FileInput],
supported_types: list[str],
) -> dict[str, FileInput]:
"""Filter files to those with supported content types.
Args:
files: All files.
supported_types: MIME type prefixes to allow.
Returns:
Filtered dictionary of supported files.
"""
return {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
def _get_resolver_config(provider_lower: str) -> FileResolverConfig:
"""Get resolver config for provider.
Args:
provider_lower: Lowercase provider name.
Returns:
Configured FileResolverConfig.
"""
if "bedrock" in provider_lower:
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
prefer_upload = bool(s3_bucket)
return FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
return FileResolverConfig(prefer_upload=False)
def _get_formatter(
provider_lower: str,
) -> OpenAIFormatter | AnthropicFormatter | BedrockFormatter | GeminiFormatter:
"""Get formatter for provider.
Args:
provider_lower: Lowercase provider name.
Returns:
Provider-specific formatter instance.
"""
if "anthropic" in provider_lower or "claude" in provider_lower:
return AnthropicFormatter()
if "bedrock" in provider_lower or "aws" in provider_lower:
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
return BedrockFormatter(s3_bucket_owner=s3_bucket_owner)
if "gemini" in provider_lower or "google" in provider_lower:
return GeminiFormatter()
return OpenAIFormatter()
def _format_block(
formatter: OpenAIFormatter
| AnthropicFormatter
| BedrockFormatter
| GeminiFormatter,
file_input: FileInput,
resolved: Any,
name: str,
) -> dict[str, Any] | None:
"""Format a single file block using the appropriate formatter.
Args:
formatter: Provider formatter.
file_input: Original file input.
resolved: Resolved file.
name: File name.
Returns:
Content block dict or None.
"""
if isinstance(formatter, BedrockFormatter):
return formatter.format_block(file_input, resolved, name=name)
return formatter.format_block(file_input, resolved)

View File

@@ -0,0 +1,28 @@
"""Base formatter protocol for provider-specific content blocks."""
from __future__ import annotations
from typing import Any, Protocol
from crewai_files.core.resolved import ResolvedFile
from crewai_files.core.types import FileInput
class ContentFormatter(Protocol):
"""Protocol for formatting resolved files into provider content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into a provider-specific content block.
Args:
file: Original file input with metadata.
resolved: Resolved file (FileReference, InlineBase64, etc.).
Returns:
Content block dict or None if file type not supported.
"""
...

View File

@@ -0,0 +1,188 @@
"""Bedrock content block formatter."""
from __future__ import annotations
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBytes,
ResolvedFile,
)
from crewai_files.core.types import FileInput
_DOCUMENT_FORMATS: dict[str, str] = {
"application/pdf": "pdf",
"text/csv": "csv",
"text/plain": "txt",
"text/markdown": "md",
"text/html": "html",
"application/msword": "doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
"application/vnd.ms-excel": "xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
}
_VIDEO_FORMATS: dict[str, str] = {
"video/mp4": "mp4",
"video/quicktime": "mov",
"video/x-matroska": "mkv",
"video/webm": "webm",
"video/x-flv": "flv",
"video/mpeg": "mpeg",
"video/3gpp": "three_gp",
}
class BedrockFormatter:
"""Formats resolved files into Bedrock Converse API content blocks."""
def __init__(self, s3_bucket_owner: str | None = None) -> None:
"""Initialize formatter.
Args:
s3_bucket_owner: Optional S3 bucket owner for file references.
"""
self.s3_bucket_owner = s3_bucket_owner
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
name: str | None = None,
) -> dict[str, Any] | None:
"""Format a resolved file into a Bedrock content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
name: File name (required for document blocks).
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
return self._format_s3_block(content_type, resolved.file_uri, name)
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = file.read()
return self._format_bytes_block(content_type, file_bytes, name)
def _format_s3_block(
self,
content_type: str,
file_uri: str,
name: str | None,
) -> dict[str, Any] | None:
"""Format block with S3 location source.
Args:
content_type: MIME type.
file_uri: S3 URI.
name: File name for documents.
Returns:
Content block dict or None.
"""
s3_location: dict[str, Any] = {"uri": file_uri}
if self.s3_bucket_owner:
s3_location["bucketOwner"] = self.s3_bucket_owner
if content_type.startswith("image/"):
return {
"image": {
"format": self._get_image_format(content_type),
"source": {"s3Location": s3_location},
}
}
if content_type.startswith("video/"):
video_format = _VIDEO_FORMATS.get(content_type)
if video_format:
return {
"video": {
"format": video_format,
"source": {"s3Location": s3_location},
}
}
return None
doc_format = _DOCUMENT_FORMATS.get(content_type)
if doc_format:
return {
"document": {
"name": name or "document",
"format": doc_format,
"source": {"s3Location": s3_location},
}
}
return None
def _format_bytes_block(
self,
content_type: str,
file_bytes: bytes,
name: str | None,
) -> dict[str, Any] | None:
"""Format block with inline bytes source.
Args:
content_type: MIME type.
file_bytes: Raw file bytes.
name: File name for documents.
Returns:
Content block dict or None.
"""
if content_type.startswith("image/"):
return {
"image": {
"format": self._get_image_format(content_type),
"source": {"bytes": file_bytes},
}
}
if content_type.startswith("video/"):
video_format = _VIDEO_FORMATS.get(content_type)
if video_format:
return {
"video": {
"format": video_format,
"source": {"bytes": file_bytes},
}
}
return None
doc_format = _DOCUMENT_FORMATS.get(content_type)
if doc_format:
return {
"document": {
"name": name or "document",
"format": doc_format,
"source": {"bytes": file_bytes},
}
}
return None
@staticmethod
def _get_image_format(content_type: str) -> str:
"""Get Bedrock image format from content type.
Args:
content_type: MIME type.
Returns:
Format string for Bedrock.
"""
media_type = content_type.split("/")[-1]
if media_type == "jpg":
return "jpeg"
return media_type

View File

@@ -0,0 +1,66 @@
"""Gemini content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
UrlReference,
)
from crewai_files.core.types import FileInput
class GeminiFormatter:
"""Formats resolved files into Gemini content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into a Gemini content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
return {
"fileData": {
"mimeType": resolved.content_type,
"fileUri": resolved.file_uri,
}
}
if isinstance(resolved, UrlReference):
return {
"fileData": {
"mimeType": content_type,
"fileUri": resolved.url,
}
}
if isinstance(resolved, InlineBase64):
return {
"inlineData": {
"mimeType": resolved.content_type,
"data": resolved.data,
}
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"inlineData": {
"mimeType": content_type,
"data": data,
}
}

View File

@@ -0,0 +1,60 @@
"""OpenAI content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
UrlReference,
)
from crewai_files.core.types import FileInput
class OpenAIFormatter:
"""Formats resolved files into OpenAI content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into an OpenAI content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
if isinstance(resolved, FileReference):
return {
"type": "file",
"file": {"file_id": resolved.file_id},
}
if isinstance(resolved, UrlReference):
return {
"type": "image_url",
"image_url": {"url": resolved.url},
}
if isinstance(resolved, InlineBase64):
return {
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}