refactor: centralize multimodal formatting in crewai_files

This commit is contained in:
Greyson LaLonde
2026-01-22 15:59:55 -05:00
parent b95a3a9bc8
commit ca07114bcf
18 changed files with 742 additions and 1002 deletions

View File

@@ -1,10 +1,10 @@
[project]
name = "crewai-files"
version = "0.1.0"
dynamic = ["version"]
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Greyson LaLonde", email = "greyson.r.lalonde@gmail.com" }
{ name = "Greyson LaLonde", email = "greyson@crewai.com" }
]
requires-python = ">=3.10, <3.14"
dependencies = [
@@ -17,9 +17,9 @@ dependencies = [
"av~=13.0.0",
]
[project.scripts]
crewai-files = "crewai_files:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.version]
path = "src/crewai_files/__init__.py"

View File

@@ -49,6 +49,10 @@ from crewai_files.core.types import (
VideoFile,
VideoMimeType,
)
from crewai_files.formatting import (
aformat_multimodal_content,
format_multimodal_content,
)
from crewai_files.processing import (
ANTHROPIC_CONSTRAINTS,
BEDROCK_CONSTRAINTS,
@@ -132,10 +136,12 @@ __all__ = [
"VideoExtension",
"VideoFile",
"VideoMimeType",
"aformat_multimodal_content",
"cleanup_expired_files",
"cleanup_provider_files",
"cleanup_uploaded_files",
"create_resolver",
"format_multimodal_content",
"get_constraints_for_provider",
"get_upload_cache",
"get_uploader",
@@ -143,3 +149,5 @@ __all__ = [
"reset_upload_cache",
"wrap_file_source",
]
__version__ = "1.8.1"

View File

@@ -0,0 +1,12 @@
"""High-level formatting API for multimodal content."""
from crewai_files.formatting.api import (
aformat_multimodal_content,
format_multimodal_content,
)
__all__ = [
"aformat_multimodal_content",
"format_multimodal_content",
]

View File

@@ -0,0 +1,91 @@
"""Anthropic content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
UrlReference,
)
from crewai_files.core.types import FileInput
class AnthropicFormatter:
"""Formats resolved files into Anthropic content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into an Anthropic content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
block_type = self._get_block_type(content_type)
if block_type is None:
return None
if isinstance(resolved, FileReference):
return {
"type": block_type,
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
if isinstance(resolved, UrlReference):
return {
"type": block_type,
"source": {
"type": "url",
"url": resolved.url,
},
}
if isinstance(resolved, InlineBase64):
return {
"type": block_type,
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"type": block_type,
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
@staticmethod
def _get_block_type(content_type: str) -> str | None:
"""Get Anthropic block type for content type.
Args:
content_type: MIME type.
Returns:
Block type string or None if not supported.
"""
if content_type.startswith("image/"):
return "image"
if content_type == "application/pdf":
return "document"
return None

View File

@@ -0,0 +1,277 @@
"""High-level API for formatting multimodal content."""
from __future__ import annotations
import os
from typing import Any
from crewai_files.cache.upload_cache import get_upload_cache
from crewai_files.core.types import FileInput
from crewai_files.formatting.anthropic import AnthropicFormatter
from crewai_files.formatting.bedrock import BedrockFormatter
from crewai_files.formatting.gemini import GeminiFormatter
from crewai_files.formatting.openai import OpenAIFormatter
from crewai_files.processing.constraints import get_constraints_for_provider
from crewai_files.processing.processor import FileProcessor
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
from crewai_files.uploaders.factory import ProviderType
def _normalize_provider(provider: str | None) -> ProviderType:
"""Normalize provider string to ProviderType.
Args:
provider: Raw provider string.
Returns:
Normalized provider type.
Raises:
ValueError: If provider is None or empty.
"""
if not provider:
raise ValueError("provider is required")
provider_lower = provider.lower()
if "gemini" in provider_lower:
return "gemini"
if "google" in provider_lower:
return "google"
if "anthropic" in provider_lower:
return "anthropic"
if "claude" in provider_lower:
return "claude"
if "bedrock" in provider_lower:
return "bedrock"
if "aws" in provider_lower:
return "aws"
if "azure" in provider_lower:
return "azure"
if "gpt" in provider_lower:
return "gpt"
return "openai"
def format_multimodal_content(
files: dict[str, FileInput],
provider: str | None = None,
) -> list[dict[str, Any]]:
"""Format files as provider-specific multimodal content blocks.
This is the main high-level API for converting files to content blocks
suitable for sending to LLM providers. It handles:
- File processing according to provider constraints
- Resolution (upload vs inline) based on provider capabilities
- Formatting into provider-specific content block structures
Args:
files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
Returns:
List of content blocks in the provider's expected format.
Example:
>>> from crewai_files import format_multimodal_content, ImageFile
>>> files = {"photo": ImageFile(source="image.jpg")}
>>> blocks = format_multimodal_content(files, "openai")
"""
if not files:
return []
provider_type = _normalize_provider(provider)
processor = FileProcessor(constraints=provider_type)
processed_files = processor.process_files(files)
if not processed_files:
return []
constraints = get_constraints_for_provider(provider_type)
supported_types = _get_supported_types(constraints)
supported_files = _filter_supported_files(processed_files, supported_types)
if not supported_files:
return []
config = _get_resolver_config(provider_type)
upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache)
formatter = _get_formatter(provider_type)
content_blocks: list[dict[str, Any]] = []
for name, file_input in supported_files.items():
resolved = resolver.resolve(file_input, provider_type)
block = _format_block(formatter, file_input, resolved, name)
if block is not None:
content_blocks.append(block)
return content_blocks
async def aformat_multimodal_content(
files: dict[str, FileInput],
provider: str | None = None,
) -> list[dict[str, Any]]:
"""Async format files as provider-specific multimodal content blocks.
Async version of format_multimodal_content with parallel file resolution.
Args:
files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
Returns:
List of content blocks in the provider's expected format.
"""
if not files:
return []
provider_type = _normalize_provider(provider)
processor = FileProcessor(constraints=provider_type)
processed_files = await processor.aprocess_files(files)
if not processed_files:
return []
constraints = get_constraints_for_provider(provider_type)
supported_types = _get_supported_types(constraints)
supported_files = _filter_supported_files(processed_files, supported_types)
if not supported_files:
return []
config = _get_resolver_config(provider_type)
upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, provider_type)
formatter = _get_formatter(provider_type)
content_blocks: list[dict[str, Any]] = []
for name, resolved in resolved_files.items():
file_input = supported_files[name]
block = _format_block(formatter, file_input, resolved, name)
if block is not None:
content_blocks.append(block)
return content_blocks
def _get_supported_types(
constraints: Any | None,
) -> list[str]:
"""Get list of supported MIME type prefixes from constraints.
Args:
constraints: Provider constraints.
Returns:
List of MIME type prefixes (e.g., ["image/", "application/pdf"]).
"""
if constraints is None:
return []
supported: list[str] = []
if constraints.image is not None:
supported.append("image/")
if constraints.pdf is not None:
supported.append("application/pdf")
if constraints.audio is not None:
supported.append("audio/")
if constraints.video is not None:
supported.append("video/")
return supported
def _filter_supported_files(
files: dict[str, FileInput],
supported_types: list[str],
) -> dict[str, FileInput]:
"""Filter files to those with supported content types.
Args:
files: All files.
supported_types: MIME type prefixes to allow.
Returns:
Filtered dictionary of supported files.
"""
return {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
def _get_resolver_config(provider_lower: str) -> FileResolverConfig:
"""Get resolver config for provider.
Args:
provider_lower: Lowercase provider name.
Returns:
Configured FileResolverConfig.
"""
if "bedrock" in provider_lower:
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
prefer_upload = bool(s3_bucket)
return FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
return FileResolverConfig(prefer_upload=False)
def _get_formatter(
provider_lower: str,
) -> OpenAIFormatter | AnthropicFormatter | BedrockFormatter | GeminiFormatter:
"""Get formatter for provider.
Args:
provider_lower: Lowercase provider name.
Returns:
Provider-specific formatter instance.
"""
if "anthropic" in provider_lower or "claude" in provider_lower:
return AnthropicFormatter()
if "bedrock" in provider_lower or "aws" in provider_lower:
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
return BedrockFormatter(s3_bucket_owner=s3_bucket_owner)
if "gemini" in provider_lower or "google" in provider_lower:
return GeminiFormatter()
return OpenAIFormatter()
def _format_block(
formatter: OpenAIFormatter
| AnthropicFormatter
| BedrockFormatter
| GeminiFormatter,
file_input: FileInput,
resolved: Any,
name: str,
) -> dict[str, Any] | None:
"""Format a single file block using the appropriate formatter.
Args:
formatter: Provider formatter.
file_input: Original file input.
resolved: Resolved file.
name: File name.
Returns:
Content block dict or None.
"""
if isinstance(formatter, BedrockFormatter):
return formatter.format_block(file_input, resolved, name=name)
return formatter.format_block(file_input, resolved)

View File

@@ -0,0 +1,28 @@
"""Base formatter protocol for provider-specific content blocks."""
from __future__ import annotations
from typing import Any, Protocol
from crewai_files.core.resolved import ResolvedFile
from crewai_files.core.types import FileInput
class ContentFormatter(Protocol):
"""Protocol for formatting resolved files into provider content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into a provider-specific content block.
Args:
file: Original file input with metadata.
resolved: Resolved file (FileReference, InlineBase64, etc.).
Returns:
Content block dict or None if file type not supported.
"""
...

View File

@@ -0,0 +1,188 @@
"""Bedrock content block formatter."""
from __future__ import annotations
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBytes,
ResolvedFile,
)
from crewai_files.core.types import FileInput
_DOCUMENT_FORMATS: dict[str, str] = {
"application/pdf": "pdf",
"text/csv": "csv",
"text/plain": "txt",
"text/markdown": "md",
"text/html": "html",
"application/msword": "doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
"application/vnd.ms-excel": "xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
}
_VIDEO_FORMATS: dict[str, str] = {
"video/mp4": "mp4",
"video/quicktime": "mov",
"video/x-matroska": "mkv",
"video/webm": "webm",
"video/x-flv": "flv",
"video/mpeg": "mpeg",
"video/3gpp": "three_gp",
}
class BedrockFormatter:
"""Formats resolved files into Bedrock Converse API content blocks."""
def __init__(self, s3_bucket_owner: str | None = None) -> None:
"""Initialize formatter.
Args:
s3_bucket_owner: Optional S3 bucket owner for file references.
"""
self.s3_bucket_owner = s3_bucket_owner
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
name: str | None = None,
) -> dict[str, Any] | None:
"""Format a resolved file into a Bedrock content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
name: File name (required for document blocks).
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
return self._format_s3_block(content_type, resolved.file_uri, name)
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = file.read()
return self._format_bytes_block(content_type, file_bytes, name)
def _format_s3_block(
self,
content_type: str,
file_uri: str,
name: str | None,
) -> dict[str, Any] | None:
"""Format block with S3 location source.
Args:
content_type: MIME type.
file_uri: S3 URI.
name: File name for documents.
Returns:
Content block dict or None.
"""
s3_location: dict[str, Any] = {"uri": file_uri}
if self.s3_bucket_owner:
s3_location["bucketOwner"] = self.s3_bucket_owner
if content_type.startswith("image/"):
return {
"image": {
"format": self._get_image_format(content_type),
"source": {"s3Location": s3_location},
}
}
if content_type.startswith("video/"):
video_format = _VIDEO_FORMATS.get(content_type)
if video_format:
return {
"video": {
"format": video_format,
"source": {"s3Location": s3_location},
}
}
return None
doc_format = _DOCUMENT_FORMATS.get(content_type)
if doc_format:
return {
"document": {
"name": name or "document",
"format": doc_format,
"source": {"s3Location": s3_location},
}
}
return None
def _format_bytes_block(
self,
content_type: str,
file_bytes: bytes,
name: str | None,
) -> dict[str, Any] | None:
"""Format block with inline bytes source.
Args:
content_type: MIME type.
file_bytes: Raw file bytes.
name: File name for documents.
Returns:
Content block dict or None.
"""
if content_type.startswith("image/"):
return {
"image": {
"format": self._get_image_format(content_type),
"source": {"bytes": file_bytes},
}
}
if content_type.startswith("video/"):
video_format = _VIDEO_FORMATS.get(content_type)
if video_format:
return {
"video": {
"format": video_format,
"source": {"bytes": file_bytes},
}
}
return None
doc_format = _DOCUMENT_FORMATS.get(content_type)
if doc_format:
return {
"document": {
"name": name or "document",
"format": doc_format,
"source": {"bytes": file_bytes},
}
}
return None
@staticmethod
def _get_image_format(content_type: str) -> str:
"""Get Bedrock image format from content type.
Args:
content_type: MIME type.
Returns:
Format string for Bedrock.
"""
media_type = content_type.split("/")[-1]
if media_type == "jpg":
return "jpeg"
return media_type

View File

@@ -0,0 +1,66 @@
"""Gemini content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
UrlReference,
)
from crewai_files.core.types import FileInput
class GeminiFormatter:
"""Formats resolved files into Gemini content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into a Gemini content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
return {
"fileData": {
"mimeType": resolved.content_type,
"fileUri": resolved.file_uri,
}
}
if isinstance(resolved, UrlReference):
return {
"fileData": {
"mimeType": content_type,
"fileUri": resolved.url,
}
}
if isinstance(resolved, InlineBase64):
return {
"inlineData": {
"mimeType": resolved.content_type,
"data": resolved.data,
}
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"inlineData": {
"mimeType": content_type,
"data": data,
}
}

View File

@@ -0,0 +1,60 @@
"""OpenAI content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
UrlReference,
)
from crewai_files.core.types import FileInput
class OpenAIFormatter:
"""Formats resolved files into OpenAI content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into an OpenAI content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
if isinstance(resolved, FileReference):
return {
"type": "file",
"file": {"file_id": resolved.file_id},
}
if isinstance(resolved, UrlReference):
return {
"type": "image_url",
"image_url": {"url": resolved.url},
}
if isinstance(resolved, InlineBase64):
return {
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}

View File

@@ -10,7 +10,7 @@ from collections.abc import Callable
import logging
from typing import TYPE_CHECKING, Any, Literal, cast
from crewai_files import FileProcessor
from crewai_files import aformat_multimodal_content, format_multimodal_content
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
from pydantic_core import CoreSchema, core_schema
@@ -220,9 +220,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
"""Inject files as multimodal content into messages.
For crews with input files and LLMs that support multimodal,
processes files according to provider constraints and file handling mode,
then delegates to the LLM's format_multimodal_content method to
generate provider-specific content blocks.
uses crewai_files to process, resolve, and format files into
provider-specific content blocks.
"""
if not self.crew or not self.task:
return
@@ -235,15 +234,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
return
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
processor = FileProcessor(constraints=provider)
files = processor.process_files(files)
content_blocks = format_multimodal_content(files, provider)
from crewai_files import get_upload_cache
upload_cache = get_upload_cache()
content_blocks = self.llm.format_multimodal_content(
files, upload_cache=upload_cache
)
if not content_blocks:
return
@@ -262,9 +254,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
"""Async inject files as multimodal content into messages.
For crews with input files and LLMs that support multimodal,
processes files according to provider constraints using parallel processing,
then delegates to the LLM's aformat_multimodal_content method to
generate provider-specific content blocks with parallel file resolution.
uses crewai_files to process, resolve, and format files into
provider-specific content blocks with parallel file resolution.
"""
if not self.crew or not self.task:
return
@@ -277,15 +268,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
return
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
processor = FileProcessor(constraints=provider)
files = await processor.aprocess_files(files)
content_blocks = await aformat_multimodal_content(files, provider)
from crewai_files import get_upload_cache
upload_cache = get_upload_cache()
content_blocks = await self.llm.aformat_multimodal_content(
files, upload_cache=upload_cache
)
if not content_blocks:
return

View File

@@ -53,7 +53,6 @@ from crewai.utilities.logger_utils import suppress_warnings
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from litellm.exceptions import ContextWindowExceededError
from litellm.litellm_core_utils.get_supported_openai_params import (
get_supported_openai_params,
@@ -2254,66 +2253,3 @@ class LLM(BaseLLM):
if "claude-3" in model_lower or "claude-4" in model_lower:
return ["image/", "application/pdf"]
return ["image/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as multimodal content blocks for litellm.
Uses OpenAI-compatible format which litellm translates to provider format.
Uses FileResolver for consistent base64 encoding.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache (not used by litellm but kept for interface consistency).
Returns:
List of content blocks in OpenAI's expected format.
"""
import base64
from crewai_files import (
FileResolver,
FileResolverConfig,
InlineBase64,
)
if not self.supports_multimodal():
return []
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
# LiteLLM uses OpenAI-compatible format
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "openai")
if isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
else:
# Fallback to direct base64 encoding
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
)
return content_blocks

View File

@@ -32,8 +32,6 @@ from crewai.types.usage_metrics import UsageMetrics
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.agent.core import Agent
from crewai.task import Task
from crewai.tools.base_tool import BaseTool
@@ -298,43 +296,6 @@ class BaseLLM(ABC):
"""
return []
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as multimodal content blocks for the LLM.
Subclasses should override this to provide provider-specific formatting.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in the provider's expected format.
"""
return []
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as multimodal content blocks for the LLM.
Default implementation calls the sync version. Subclasses should
override to use async file resolution for parallel processing.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in the provider's expected format.
"""
return self.format_multimodal_content(files, upload_cache)
def format_text_content(self, text: str) -> dict[str, Any]:
"""Format text as a content block for the LLM.

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
import json
import logging
import os
@@ -20,12 +19,8 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.llms.hooks.base import BaseInterceptor
DEFAULT_CACHE_TTL = "ephemeral"
try:
from anthropic import Anthropic, AsyncAnthropic
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
@@ -1256,222 +1251,3 @@ class AnthropicCompletion(BaseLLM):
if not self.supports_multimodal():
return []
return ["image/", "application/pdf"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
enable_caching: bool = True,
cache_ttl: str | None = None,
) -> list[dict[str, Any]]:
"""Format files as Anthropic multimodal content blocks.
Anthropic supports both base64 inline format and file references via Files API.
Uses FileResolver to determine the best delivery method based on file size.
Supports prompt caching to reduce costs and latency for repeated file usage.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
enable_caching: Whether to add cache_control markers (default: True).
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
Returns:
List of content blocks in Anthropic's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
file_list = list(files.values())
num_files = len(file_list)
for i, file_input in enumerate(file_list):
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "anthropic")
block: dict[str, Any] = {}
if isinstance(resolved, FileReference):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif isinstance(resolved, InlineBase64):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
else:
data = base64.b64encode(file_input.read()).decode("ascii")
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
if block and enable_caching and i == num_files - 1:
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
block["cache_control"] = cache_control
if block:
content_blocks.append(block)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
enable_caching: bool = True,
cache_ttl: str | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Anthropic multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
enable_caching: Whether to add cache_control markers (default: True).
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
Returns:
List of content blocks in Anthropic's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "anthropic")
content_blocks: list[dict[str, Any]] = []
num_files = len(resolved_files)
file_names = list(supported_files.keys())
for i, name in enumerate(file_names):
if name not in resolved_files:
continue
resolved = resolved_files[name]
file_input = supported_files[name]
content_type = file_input.content_type
block: dict[str, Any] = {}
if isinstance(resolved, FileReference):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif isinstance(resolved, InlineBase64):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
if block and enable_caching and i == num_files - 1:
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
block["cache_control"] = cache_control
if block:
content_blocks.append(block)
return content_blocks

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
import json
import logging
import os
@@ -18,8 +17,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.llms.hooks.base import BaseInterceptor
@@ -1040,115 +1037,3 @@ class AzureCompletion(BaseLLM):
if not self.supports_multimodal():
return []
return ["image/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as Azure OpenAI multimodal content blocks.
Azure OpenAI uses the same image_url format as OpenAI.
Uses FileResolver for consistent base64 encoding.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
Returns:
List of content blocks in Azure OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
# Azure doesn't support file uploads for images, so just use inline
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "azure")
if isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
else:
# Fallback to direct base64 encoding
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Azure OpenAI multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
Returns:
List of content blocks in Azure OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "azure")
return [
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
for resolved in resolved_files.values()
if isinstance(resolved, InlineBase64)
]

View File

@@ -20,7 +20,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from mypy_boto3_bedrock_runtime.type_defs import (
GuardrailConfigurationTypeDef,
GuardrailStreamConfigurationTypeDef,
@@ -1563,260 +1562,3 @@ class BedrockCompletion(BaseLLM):
"video/3gpp": "three_gp",
}
return format_map.get(content_type)
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as Bedrock Converse API multimodal content blocks.
Bedrock Converse API supports both raw bytes and S3 URI references.
S3 uploads are only supported by Amazon Nova models.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for S3 uploads.
Returns:
List of content blocks in Bedrock's expected format.
"""
if not self.supports_multimodal():
return []
import os
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBytes,
)
content_blocks: list[dict[str, Any]] = []
is_nova = self._is_nova_model()
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
prefer_upload = bool(s3_bucket) and is_nova
config = FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for name, file_input in files.items():
content_type = file_input.content_type
resolved = resolver.resolve(file_input, "bedrock")
if isinstance(resolved, FileReference) and resolved.file_uri:
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
if s3_bucket_owner:
s3_location["bucketOwner"] = s3_bucket_owner
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"s3Location": s3_location},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"s3Location": s3_location},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"s3Location": s3_location},
}
}
)
else:
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = file_input.read()
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"bytes": file_bytes},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"bytes": file_bytes},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"bytes": file_bytes},
}
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Bedrock Converse API multimodal content blocks.
Uses parallel file resolution. S3 uploads are only supported by Nova models.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for S3 uploads.
Returns:
List of content blocks in Bedrock's expected format.
"""
if not self.supports_multimodal():
return []
import os
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBytes,
)
is_nova = self._is_nova_model()
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
prefer_upload = bool(s3_bucket) and is_nova
config = FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(files, "bedrock")
content_blocks: list[dict[str, Any]] = []
for name, resolved in resolved_files.items():
file_input = files[name]
content_type = file_input.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
if s3_bucket_owner:
s3_location["bucketOwner"] = s3_bucket_owner
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"s3Location": s3_location},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"s3Location": s3_location},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"s3Location": s3_location},
}
}
)
else:
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = await file_input.aread()
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"bytes": file_bytes},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"bytes": file_bytes},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"bytes": file_bytes},
}
}
)
return content_blocks

View File

@@ -19,11 +19,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import (
FileInput,
UploadCache,
)
from crewai.llms.hooks.base import BaseInterceptor
@@ -1097,138 +1092,6 @@ class GeminiCompletion(BaseLLM):
"""
return ["image/", "audio/", "video/", "application/pdf", "text/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as Gemini multimodal content blocks.
Gemini supports both inlineData format and file references via File API.
Uses FileResolver to determine the best delivery method based on file size.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in Gemini's expected format.
"""
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "gemini")
if isinstance(resolved, FileReference) and resolved.file_uri:
# Use file reference format for uploaded files
content_blocks.append(
{
"fileData": {
"mimeType": resolved.content_type,
"fileUri": resolved.file_uri,
}
}
)
elif isinstance(resolved, InlineBase64):
# Use inline format for smaller files
content_blocks.append(
{
"inlineData": {
"mimeType": resolved.content_type,
"data": resolved.data,
}
}
)
else:
# Fallback to base64 encoding
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"inlineData": {
"mimeType": content_type,
"data": data,
}
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Gemini multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in Gemini's expected format.
"""
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "gemini")
content_blocks: list[dict[str, Any]] = []
for resolved in resolved_files.values():
if isinstance(resolved, FileReference) and resolved.file_uri:
content_blocks.append(
{
"fileData": {
"mimeType": resolved.content_type,
"fileUri": resolved.file_uri,
}
}
)
elif isinstance(resolved, InlineBase64):
content_blocks.append(
{
"inlineData": {
"mimeType": resolved.content_type,
"data": resolved.data,
}
}
)
return content_blocks
def format_text_content(self, text: str) -> dict[str, Any]:
"""Format text as a Gemini content block.

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
from collections.abc import AsyncIterator
import json
import logging
@@ -27,8 +26,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.agent.core import Agent
from crewai.llms.hooks.base import BaseInterceptor
from crewai.task import Task
@@ -1080,136 +1077,3 @@ class OpenAICompletion(BaseLLM):
if not self.supports_multimodal():
return []
return ["image/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as OpenAI multimodal content blocks.
OpenAI supports both base64 data URLs and file_id references via Files API.
Uses FileResolver to determine the best delivery method based on file size.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "openai")
if isinstance(resolved, FileReference):
content_blocks.append(
{
"type": "file",
"file": {
"file_id": resolved.file_id,
},
}
)
elif isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
else:
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as OpenAI multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "openai")
content_blocks: list[dict[str, Any]] = []
for resolved in resolved_files.values():
if isinstance(resolved, FileReference):
content_blocks.append(
{
"type": "file",
"file": {
"file_id": resolved.file_id,
},
}
)
elif isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
return content_blocks