refactor: centralize multimodal formatting in crewai_files

2026-01-28 17:48:13 +00:00 · 2026-01-22 15:59:55 -05:00
parent b95a3a9bc8
commit ca07114bcf
18 changed files with 742 additions and 1002 deletions
--- a/lib/crewai-files/pyproject.toml
+++ b/lib/crewai-files/pyproject.toml
@@ -1,10 +1,10 @@
 [project]
 name = "crewai-files"
-version = "0.1.0"
+dynamic = ["version"]
 description = "Add your description here"
 readme = "README.md"
 authors = [
-    { name = "Greyson LaLonde", email = "greyson.r.lalonde@gmail.com" }
+    { name = "Greyson LaLonde", email = "greyson@crewai.com" }
 ]
 requires-python = ">=3.10, <3.14"
 dependencies = [
@@ -17,9 +17,9 @@ dependencies = [
    "av~=13.0.0",
 ]
 [project.scripts]
 crewai-files = "crewai_files:main"
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.hatch.version]
 path = "src/crewai_files/__init__.py"
--- a/lib/crewai-files/src/crewai_files/init.py
+++ b/lib/crewai-files/src/crewai_files/init.py
@@ -49,6 +49,10 @@ from crewai_files.core.types import (
    VideoFile,
    VideoMimeType,
 )
 from crewai_files.formatting import (
    aformat_multimodal_content,
    format_multimodal_content,
 )
 from crewai_files.processing import (
    ANTHROPIC_CONSTRAINTS,
    BEDROCK_CONSTRAINTS,
@@ -132,10 +136,12 @@ __all__ = [
    "VideoExtension",
    "VideoFile",
    "VideoMimeType",
    "aformat_multimodal_content",
    "cleanup_expired_files",
    "cleanup_provider_files",
    "cleanup_uploaded_files",
    "create_resolver",
    "format_multimodal_content",
    "get_constraints_for_provider",
    "get_upload_cache",
    "get_uploader",
@@ -143,3 +149,5 @@ __all__ = [
    "reset_upload_cache",
    "wrap_file_source",
 ]
 __version__ = "1.8.1"
--- a/lib/crewai-files/src/crewai_files/formatting/init.py
+++ b/lib/crewai-files/src/crewai_files/formatting/init.py
@@ -0,0 +1,12 @@
 """High-level formatting API for multimodal content."""
 from crewai_files.formatting.api import (
    aformat_multimodal_content,
    format_multimodal_content,
 )
 __all__ = [
    "aformat_multimodal_content",
    "format_multimodal_content",
 ]
--- a/lib/crewai-files/src/crewai_files/formatting/anthropic.py
+++ b/lib/crewai-files/src/crewai_files/formatting/anthropic.py
@@ -0,0 +1,91 @@
 """Anthropic content block formatter."""
 from __future__ import annotations
 import base64
 from typing import Any
 from crewai_files.core.resolved import (
    FileReference,
    InlineBase64,
    ResolvedFile,
    UrlReference,
 )
 from crewai_files.core.types import FileInput
 class AnthropicFormatter:
    """Formats resolved files into Anthropic content blocks."""
    def format_block(
        self,
        file: FileInput,
        resolved: ResolvedFile,
    ) -> dict[str, Any] | None:
        """Format a resolved file into an Anthropic content block.
        Args:
            file: Original file input with metadata.
            resolved: Resolved file.
        Returns:
            Content block dict or None if not supported.
        """
        content_type = file.content_type
        block_type = self._get_block_type(content_type)
        if block_type is None:
            return None
        if isinstance(resolved, FileReference):
            return {
                "type": block_type,
                "source": {
                    "type": "file",
                    "file_id": resolved.file_id,
                },
            }
        if isinstance(resolved, UrlReference):
            return {
                "type": block_type,
                "source": {
                    "type": "url",
                    "url": resolved.url,
                },
            }
        if isinstance(resolved, InlineBase64):
            return {
                "type": block_type,
                "source": {
                    "type": "base64",
                    "media_type": resolved.content_type,
                    "data": resolved.data,
                },
            }
        data = base64.b64encode(file.read()).decode("ascii")
        return {
            "type": block_type,
            "source": {
                "type": "base64",
                "media_type": content_type,
                "data": data,
            },
        }
    @staticmethod
    def _get_block_type(content_type: str) -> str | None:
        """Get Anthropic block type for content type.
        Args:
            content_type: MIME type.
        Returns:
            Block type string or None if not supported.
        """
        if content_type.startswith("image/"):
            return "image"
        if content_type == "application/pdf":
            return "document"
        return None
--- a/lib/crewai-files/src/crewai_files/formatting/api.py
+++ b/lib/crewai-files/src/crewai_files/formatting/api.py
@@ -0,0 +1,277 @@
 """High-level API for formatting multimodal content."""
 from __future__ import annotations
 import os
 from typing import Any
 from crewai_files.cache.upload_cache import get_upload_cache
 from crewai_files.core.types import FileInput
 from crewai_files.formatting.anthropic import AnthropicFormatter
 from crewai_files.formatting.bedrock import BedrockFormatter
 from crewai_files.formatting.gemini import GeminiFormatter
 from crewai_files.formatting.openai import OpenAIFormatter
 from crewai_files.processing.constraints import get_constraints_for_provider
 from crewai_files.processing.processor import FileProcessor
 from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
 from crewai_files.uploaders.factory import ProviderType
 def _normalize_provider(provider: str | None) -> ProviderType:
    """Normalize provider string to ProviderType.
    Args:
        provider: Raw provider string.
    Returns:
        Normalized provider type.
    Raises:
        ValueError: If provider is None or empty.
    """
    if not provider:
        raise ValueError("provider is required")
    provider_lower = provider.lower()
    if "gemini" in provider_lower:
        return "gemini"
    if "google" in provider_lower:
        return "google"
    if "anthropic" in provider_lower:
        return "anthropic"
    if "claude" in provider_lower:
        return "claude"
    if "bedrock" in provider_lower:
        return "bedrock"
    if "aws" in provider_lower:
        return "aws"
    if "azure" in provider_lower:
        return "azure"
    if "gpt" in provider_lower:
        return "gpt"
    return "openai"
 def format_multimodal_content(
    files: dict[str, FileInput],
    provider: str | None = None,
 ) -> list[dict[str, Any]]:
    """Format files as provider-specific multimodal content blocks.
    This is the main high-level API for converting files to content blocks
    suitable for sending to LLM providers. It handles:
    - File processing according to provider constraints
    - Resolution (upload vs inline) based on provider capabilities
    - Formatting into provider-specific content block structures
    Args:
        files: Dictionary mapping file names to FileInput objects.
        provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
    Returns:
        List of content blocks in the provider's expected format.
    Example:
        >>> from crewai_files import format_multimodal_content, ImageFile
        >>> files = {"photo": ImageFile(source="image.jpg")}
        >>> blocks = format_multimodal_content(files, "openai")
    """
    if not files:
        return []
    provider_type = _normalize_provider(provider)
    processor = FileProcessor(constraints=provider_type)
    processed_files = processor.process_files(files)
    if not processed_files:
        return []
    constraints = get_constraints_for_provider(provider_type)
    supported_types = _get_supported_types(constraints)
    supported_files = _filter_supported_files(processed_files, supported_types)
    if not supported_files:
        return []
    config = _get_resolver_config(provider_type)
    upload_cache = get_upload_cache()
    resolver = FileResolver(config=config, upload_cache=upload_cache)
    formatter = _get_formatter(provider_type)
    content_blocks: list[dict[str, Any]] = []
    for name, file_input in supported_files.items():
        resolved = resolver.resolve(file_input, provider_type)
        block = _format_block(formatter, file_input, resolved, name)
        if block is not None:
            content_blocks.append(block)
    return content_blocks
 async def aformat_multimodal_content(
    files: dict[str, FileInput],
    provider: str | None = None,
 ) -> list[dict[str, Any]]:
    """Async format files as provider-specific multimodal content blocks.
    Async version of format_multimodal_content with parallel file resolution.
    Args:
        files: Dictionary mapping file names to FileInput objects.
        provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
    Returns:
        List of content blocks in the provider's expected format.
    """
    if not files:
        return []
    provider_type = _normalize_provider(provider)
    processor = FileProcessor(constraints=provider_type)
    processed_files = await processor.aprocess_files(files)
    if not processed_files:
        return []
    constraints = get_constraints_for_provider(provider_type)
    supported_types = _get_supported_types(constraints)
    supported_files = _filter_supported_files(processed_files, supported_types)
    if not supported_files:
        return []
    config = _get_resolver_config(provider_type)
    upload_cache = get_upload_cache()
    resolver = FileResolver(config=config, upload_cache=upload_cache)
    resolved_files = await resolver.aresolve_files(supported_files, provider_type)
    formatter = _get_formatter(provider_type)
    content_blocks: list[dict[str, Any]] = []
    for name, resolved in resolved_files.items():
        file_input = supported_files[name]
        block = _format_block(formatter, file_input, resolved, name)
        if block is not None:
            content_blocks.append(block)
    return content_blocks
 def _get_supported_types(
    constraints: Any | None,
 ) -> list[str]:
    """Get list of supported MIME type prefixes from constraints.
    Args:
        constraints: Provider constraints.
    Returns:
        List of MIME type prefixes (e.g., ["image/", "application/pdf"]).
    """
    if constraints is None:
        return []
    supported: list[str] = []
    if constraints.image is not None:
        supported.append("image/")
    if constraints.pdf is not None:
        supported.append("application/pdf")
    if constraints.audio is not None:
        supported.append("audio/")
    if constraints.video is not None:
        supported.append("video/")
    return supported
 def _filter_supported_files(
    files: dict[str, FileInput],
    supported_types: list[str],
 ) -> dict[str, FileInput]:
    """Filter files to those with supported content types.
    Args:
        files: All files.
        supported_types: MIME type prefixes to allow.
    Returns:
        Filtered dictionary of supported files.
    """
    return {
        name: f
        for name, f in files.items()
        if any(f.content_type.startswith(t) for t in supported_types)
    }
 def _get_resolver_config(provider_lower: str) -> FileResolverConfig:
    """Get resolver config for provider.
    Args:
        provider_lower: Lowercase provider name.
    Returns:
        Configured FileResolverConfig.
    """
    if "bedrock" in provider_lower:
        s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
        prefer_upload = bool(s3_bucket)
        return FileResolverConfig(
            prefer_upload=prefer_upload, use_bytes_for_bedrock=True
        )
    return FileResolverConfig(prefer_upload=False)
 def _get_formatter(
    provider_lower: str,
 ) -> OpenAIFormatter | AnthropicFormatter | BedrockFormatter | GeminiFormatter:
    """Get formatter for provider.
    Args:
        provider_lower: Lowercase provider name.
    Returns:
        Provider-specific formatter instance.
    """
    if "anthropic" in provider_lower or "claude" in provider_lower:
        return AnthropicFormatter()
    if "bedrock" in provider_lower or "aws" in provider_lower:
        s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
        return BedrockFormatter(s3_bucket_owner=s3_bucket_owner)
    if "gemini" in provider_lower or "google" in provider_lower:
        return GeminiFormatter()
    return OpenAIFormatter()
 def _format_block(
    formatter: OpenAIFormatter
    | AnthropicFormatter
    | BedrockFormatter
    | GeminiFormatter,
    file_input: FileInput,
    resolved: Any,
    name: str,
 ) -> dict[str, Any] | None:
    """Format a single file block using the appropriate formatter.
    Args:
        formatter: Provider formatter.
        file_input: Original file input.
        resolved: Resolved file.
        name: File name.
    Returns:
        Content block dict or None.
    """
    if isinstance(formatter, BedrockFormatter):
        return formatter.format_block(file_input, resolved, name=name)
    return formatter.format_block(file_input, resolved)
--- a/lib/crewai-files/src/crewai_files/formatting/base.py
+++ b/lib/crewai-files/src/crewai_files/formatting/base.py
@@ -0,0 +1,28 @@
 """Base formatter protocol for provider-specific content blocks."""
 from __future__ import annotations
 from typing import Any, Protocol
 from crewai_files.core.resolved import ResolvedFile
 from crewai_files.core.types import FileInput
 class ContentFormatter(Protocol):
    """Protocol for formatting resolved files into provider content blocks."""
    def format_block(
        self,
        file: FileInput,
        resolved: ResolvedFile,
    ) -> dict[str, Any] | None:
        """Format a resolved file into a provider-specific content block.
        Args:
            file: Original file input with metadata.
            resolved: Resolved file (FileReference, InlineBase64, etc.).
        Returns:
            Content block dict or None if file type not supported.
        """
        ...
--- a/lib/crewai-files/src/crewai_files/formatting/bedrock.py
+++ b/lib/crewai-files/src/crewai_files/formatting/bedrock.py
@@ -0,0 +1,188 @@
 """Bedrock content block formatter."""
 from __future__ import annotations
 from typing import Any
 from crewai_files.core.resolved import (
    FileReference,
    InlineBytes,
    ResolvedFile,
 )
 from crewai_files.core.types import FileInput
 _DOCUMENT_FORMATS: dict[str, str] = {
    "application/pdf": "pdf",
    "text/csv": "csv",
    "text/plain": "txt",
    "text/markdown": "md",
    "text/html": "html",
    "application/msword": "doc",
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
    "application/vnd.ms-excel": "xls",
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
 }
 _VIDEO_FORMATS: dict[str, str] = {
    "video/mp4": "mp4",
    "video/quicktime": "mov",
    "video/x-matroska": "mkv",
    "video/webm": "webm",
    "video/x-flv": "flv",
    "video/mpeg": "mpeg",
    "video/3gpp": "three_gp",
 }
 class BedrockFormatter:
    """Formats resolved files into Bedrock Converse API content blocks."""
    def __init__(self, s3_bucket_owner: str | None = None) -> None:
        """Initialize formatter.
        Args:
            s3_bucket_owner: Optional S3 bucket owner for file references.
        """
        self.s3_bucket_owner = s3_bucket_owner
    def format_block(
        self,
        file: FileInput,
        resolved: ResolvedFile,
        name: str | None = None,
    ) -> dict[str, Any] | None:
        """Format a resolved file into a Bedrock content block.
        Args:
            file: Original file input with metadata.
            resolved: Resolved file.
            name: File name (required for document blocks).
        Returns:
            Content block dict or None if not supported.
        """
        content_type = file.content_type
        if isinstance(resolved, FileReference) and resolved.file_uri:
            return self._format_s3_block(content_type, resolved.file_uri, name)
        if isinstance(resolved, InlineBytes):
            file_bytes = resolved.data
        else:
            file_bytes = file.read()
        return self._format_bytes_block(content_type, file_bytes, name)
    def _format_s3_block(
        self,
        content_type: str,
        file_uri: str,
        name: str | None,
    ) -> dict[str, Any] | None:
        """Format block with S3 location source.
        Args:
            content_type: MIME type.
            file_uri: S3 URI.
            name: File name for documents.
        Returns:
            Content block dict or None.
        """
        s3_location: dict[str, Any] = {"uri": file_uri}
        if self.s3_bucket_owner:
            s3_location["bucketOwner"] = self.s3_bucket_owner
        if content_type.startswith("image/"):
            return {
                "image": {
                    "format": self._get_image_format(content_type),
                    "source": {"s3Location": s3_location},
                }
            }
        if content_type.startswith("video/"):
            video_format = _VIDEO_FORMATS.get(content_type)
            if video_format:
                return {
                    "video": {
                        "format": video_format,
                        "source": {"s3Location": s3_location},
                    }
                }
            return None
        doc_format = _DOCUMENT_FORMATS.get(content_type)
        if doc_format:
            return {
                "document": {
                    "name": name or "document",
                    "format": doc_format,
                    "source": {"s3Location": s3_location},
                }
            }
        return None
    def _format_bytes_block(
        self,
        content_type: str,
        file_bytes: bytes,
        name: str | None,
    ) -> dict[str, Any] | None:
        """Format block with inline bytes source.
        Args:
            content_type: MIME type.
            file_bytes: Raw file bytes.
            name: File name for documents.
        Returns:
            Content block dict or None.
        """
        if content_type.startswith("image/"):
            return {
                "image": {
                    "format": self._get_image_format(content_type),
                    "source": {"bytes": file_bytes},
                }
            }
        if content_type.startswith("video/"):
            video_format = _VIDEO_FORMATS.get(content_type)
            if video_format:
                return {
                    "video": {
                        "format": video_format,
                        "source": {"bytes": file_bytes},
                    }
                }
            return None
        doc_format = _DOCUMENT_FORMATS.get(content_type)
        if doc_format:
            return {
                "document": {
                    "name": name or "document",
                    "format": doc_format,
                    "source": {"bytes": file_bytes},
                }
            }
        return None
    @staticmethod
    def _get_image_format(content_type: str) -> str:
        """Get Bedrock image format from content type.
        Args:
            content_type: MIME type.
        Returns:
            Format string for Bedrock.
        """
        media_type = content_type.split("/")[-1]
        if media_type == "jpg":
            return "jpeg"
        return media_type
--- a/lib/crewai-files/src/crewai_files/formatting/gemini.py
+++ b/lib/crewai-files/src/crewai_files/formatting/gemini.py
@@ -0,0 +1,66 @@
 """Gemini content block formatter."""
 from __future__ import annotations
 import base64
 from typing import Any
 from crewai_files.core.resolved import (
    FileReference,
    InlineBase64,
    ResolvedFile,
    UrlReference,
 )
 from crewai_files.core.types import FileInput
 class GeminiFormatter:
    """Formats resolved files into Gemini content blocks."""
    def format_block(
        self,
        file: FileInput,
        resolved: ResolvedFile,
    ) -> dict[str, Any] | None:
        """Format a resolved file into a Gemini content block.
        Args:
            file: Original file input with metadata.
            resolved: Resolved file.
        Returns:
            Content block dict or None if not supported.
        """
        content_type = file.content_type
        if isinstance(resolved, FileReference) and resolved.file_uri:
            return {
                "fileData": {
                    "mimeType": resolved.content_type,
                    "fileUri": resolved.file_uri,
                }
            }
        if isinstance(resolved, UrlReference):
            return {
                "fileData": {
                    "mimeType": content_type,
                    "fileUri": resolved.url,
                }
            }
        if isinstance(resolved, InlineBase64):
            return {
                "inlineData": {
                    "mimeType": resolved.content_type,
                    "data": resolved.data,
                }
            }
        data = base64.b64encode(file.read()).decode("ascii")
        return {
            "inlineData": {
                "mimeType": content_type,
                "data": data,
            }
        }
--- a/lib/crewai-files/src/crewai_files/formatting/openai.py
+++ b/lib/crewai-files/src/crewai_files/formatting/openai.py
@@ -0,0 +1,60 @@
 """OpenAI content block formatter."""
 from __future__ import annotations
 import base64
 from typing import Any
 from crewai_files.core.resolved import (
    FileReference,
    InlineBase64,
    ResolvedFile,
    UrlReference,
 )
 from crewai_files.core.types import FileInput
 class OpenAIFormatter:
    """Formats resolved files into OpenAI content blocks."""
    def format_block(
        self,
        file: FileInput,
        resolved: ResolvedFile,
    ) -> dict[str, Any] | None:
        """Format a resolved file into an OpenAI content block.
        Args:
            file: Original file input with metadata.
            resolved: Resolved file.
        Returns:
            Content block dict or None if not supported.
        """
        content_type = file.content_type
        if isinstance(resolved, FileReference):
            return {
                "type": "file",
                "file": {"file_id": resolved.file_id},
            }
        if isinstance(resolved, UrlReference):
            return {
                "type": "image_url",
                "image_url": {"url": resolved.url},
            }
        if isinstance(resolved, InlineBase64):
            return {
                "type": "image_url",
                "image_url": {
                    "url": f"data:{resolved.content_type};base64,{resolved.data}"
                },
            }
        data = base64.b64encode(file.read()).decode("ascii")
        return {
            "type": "image_url",
            "image_url": {"url": f"data:{content_type};base64,{data}"},
        }
--- a/lib/crewai/src/crewai/agents/crew_agent_executor.py
+++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -10,7 +10,7 @@ from collections.abc import Callable
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast
-from crewai_files import FileProcessor
+from crewai_files import aformat_multimodal_content, format_multimodal_content
 from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
 from pydantic_core import CoreSchema, core_schema
@@ -220,9 +220,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        """Inject files as multimodal content into messages.
        For crews with input files and LLMs that support multimodal,
-        processes files according to provider constraints and file handling mode,
+        uses crewai_files to process, resolve, and format files into
-        then delegates to the LLM's format_multimodal_content method to
+        provider-specific content blocks.
        generate provider-specific content blocks.
        """
        if not self.crew or not self.task:
            return
@@ -235,15 +234,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            return
        provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
-        processor = FileProcessor(constraints=provider)
+        content_blocks = format_multimodal_content(files, provider)
        files = processor.process_files(files)
        from crewai_files import get_upload_cache
        upload_cache = get_upload_cache()
        content_blocks = self.llm.format_multimodal_content(
            files, upload_cache=upload_cache
        )
        if not content_blocks:
            return
@@ -262,9 +254,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        """Async inject files as multimodal content into messages.
        For crews with input files and LLMs that support multimodal,
-        processes files according to provider constraints using parallel processing,
+        uses crewai_files to process, resolve, and format files into
-        then delegates to the LLM's aformat_multimodal_content method to
+        provider-specific content blocks with parallel file resolution.
        generate provider-specific content blocks with parallel file resolution.
        """
        if not self.crew or not self.task:
            return
@@ -277,15 +268,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            return
        provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
-        processor = FileProcessor(constraints=provider)
+        content_blocks = await aformat_multimodal_content(files, provider)
        files = await processor.aprocess_files(files)
        from crewai_files import get_upload_cache
        upload_cache = get_upload_cache()
        content_blocks = await self.llm.aformat_multimodal_content(
            files, upload_cache=upload_cache
        )
        if not content_blocks:
            return
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -53,7 +53,6 @@ from crewai.utilities.logger_utils import suppress_warnings
 if TYPE_CHECKING:
    from crewai_files import FileInput, UploadCache
    from litellm.exceptions import ContextWindowExceededError
    from litellm.litellm_core_utils.get_supported_openai_params import (
        get_supported_openai_params,
@@ -2254,66 +2253,3 @@ class LLM(BaseLLM):
        if "claude-3" in model_lower or "claude-4" in model_lower:
            return ["image/", "application/pdf"]
        return ["image/"]
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as multimodal content blocks for litellm.
        Uses OpenAI-compatible format which litellm translates to provider format.
        Uses FileResolver for consistent base64 encoding.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache (not used by litellm but kept for interface consistency).
        Returns:
            List of content blocks in OpenAI's expected format.
        """
        import base64
        from crewai_files import (
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        if not self.supports_multimodal():
            return []
        content_blocks: list[dict[str, Any]] = []
        supported_types = self.supported_multimodal_content_types()
        # LiteLLM uses OpenAI-compatible format
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        for file_input in files.values():
            content_type = file_input.content_type
            if not any(content_type.startswith(t) for t in supported_types):
                continue
            resolved = resolver.resolve(file_input, "openai")
            if isinstance(resolved, InlineBase64):
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{resolved.content_type};base64,{resolved.data}"
                        },
                    }
                )
            else:
                # Fallback to direct base64 encoding
                data = base64.b64encode(file_input.read()).decode("ascii")
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:{content_type};base64,{data}"},
                    }
                )
        return content_blocks
--- a/lib/crewai/src/crewai/llms/base_llm.py
+++ b/lib/crewai/src/crewai/llms/base_llm.py
@@ -32,8 +32,6 @@ from crewai.types.usage_metrics import UsageMetrics
 if TYPE_CHECKING:
    from crewai_files import FileInput, UploadCache
    from crewai.agent.core import Agent
    from crewai.task import Task
    from crewai.tools.base_tool import BaseTool
@@ -298,43 +296,6 @@ class BaseLLM(ABC):
        """
        return []
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as multimodal content blocks for the LLM.
        Subclasses should override this to provide provider-specific formatting.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
        Returns:
            List of content blocks in the provider's expected format.
        """
        return []
    async def aformat_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Async format files as multimodal content blocks for the LLM.
        Default implementation calls the sync version. Subclasses should
        override to use async file resolution for parallel processing.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
        Returns:
            List of content blocks in the provider's expected format.
        """
        return self.format_multimodal_content(files, upload_cache)
    def format_text_content(self, text: str) -> dict[str, Any]:
        """Format text as a content block for the LLM.
--- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import base64
 import json
 import logging
 import os
@@ -20,12 +19,8 @@ from crewai.utilities.types import LLMMessage
 if TYPE_CHECKING:
    from crewai_files import FileInput, UploadCache
    from crewai.llms.hooks.base import BaseInterceptor
 DEFAULT_CACHE_TTL = "ephemeral"
 try:
    from anthropic import Anthropic, AsyncAnthropic
    from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
@@ -1256,222 +1251,3 @@ class AnthropicCompletion(BaseLLM):
        if not self.supports_multimodal():
            return []
        return ["image/", "application/pdf"]
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
        enable_caching: bool = True,
        cache_ttl: str | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as Anthropic multimodal content blocks.
        Anthropic supports both base64 inline format and file references via Files API.
        Uses FileResolver to determine the best delivery method based on file size.
        Supports prompt caching to reduce costs and latency for repeated file usage.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
            enable_caching: Whether to add cache_control markers (default: True).
            cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
        Returns:
            List of content blocks in Anthropic's expected format.
        """
        if not self.supports_multimodal():
            return []
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        content_blocks: list[dict[str, Any]] = []
        supported_types = self.supported_multimodal_content_types()
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        file_list = list(files.values())
        num_files = len(file_list)
        for i, file_input in enumerate(file_list):
            content_type = file_input.content_type
            if not any(content_type.startswith(t) for t in supported_types):
                continue
            resolved = resolver.resolve(file_input, "anthropic")
            block: dict[str, Any] = {}
            if isinstance(resolved, FileReference):
                if content_type.startswith("image/"):
                    block = {
                        "type": "image",
                        "source": {
                            "type": "file",
                            "file_id": resolved.file_id,
                        },
                    }
                elif content_type == "application/pdf":
                    block = {
                        "type": "document",
                        "source": {
                            "type": "file",
                            "file_id": resolved.file_id,
                        },
                    }
            elif isinstance(resolved, InlineBase64):
                if content_type.startswith("image/"):
                    block = {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": resolved.content_type,
                            "data": resolved.data,
                        },
                    }
                elif content_type == "application/pdf":
                    block = {
                        "type": "document",
                        "source": {
                            "type": "base64",
                            "media_type": resolved.content_type,
                            "data": resolved.data,
                        },
                    }
            else:
                data = base64.b64encode(file_input.read()).decode("ascii")
                if content_type.startswith("image/"):
                    block = {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": content_type,
                            "data": data,
                        },
                    }
                elif content_type == "application/pdf":
                    block = {
                        "type": "document",
                        "source": {
                            "type": "base64",
                            "media_type": content_type,
                            "data": data,
                        },
                    }
            if block and enable_caching and i == num_files - 1:
                cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
                block["cache_control"] = cache_control
            if block:
                content_blocks.append(block)
        return content_blocks
    async def aformat_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
        enable_caching: bool = True,
        cache_ttl: str | None = None,
    ) -> list[dict[str, Any]]:
        """Async format files as Anthropic multimodal content blocks.
        Uses parallel file resolution for improved performance with multiple files.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
            enable_caching: Whether to add cache_control markers (default: True).
            cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
        Returns:
            List of content blocks in Anthropic's expected format.
        """
        if not self.supports_multimodal():
            return []
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        supported_types = self.supported_multimodal_content_types()
        supported_files = {
            name: f
            for name, f in files.items()
            if any(f.content_type.startswith(t) for t in supported_types)
        }
        if not supported_files:
            return []
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        resolved_files = await resolver.aresolve_files(supported_files, "anthropic")
        content_blocks: list[dict[str, Any]] = []
        num_files = len(resolved_files)
        file_names = list(supported_files.keys())
        for i, name in enumerate(file_names):
            if name not in resolved_files:
                continue
            resolved = resolved_files[name]
            file_input = supported_files[name]
            content_type = file_input.content_type
            block: dict[str, Any] = {}
            if isinstance(resolved, FileReference):
                if content_type.startswith("image/"):
                    block = {
                        "type": "image",
                        "source": {
                            "type": "file",
                            "file_id": resolved.file_id,
                        },
                    }
                elif content_type == "application/pdf":
                    block = {
                        "type": "document",
                        "source": {
                            "type": "file",
                            "file_id": resolved.file_id,
                        },
                    }
            elif isinstance(resolved, InlineBase64):
                if content_type.startswith("image/"):
                    block = {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": resolved.content_type,
                            "data": resolved.data,
                        },
                    }
                elif content_type == "application/pdf":
                    block = {
                        "type": "document",
                        "source": {
                            "type": "base64",
                            "media_type": resolved.content_type,
                            "data": resolved.data,
                        },
                    }
            if block and enable_caching and i == num_files - 1:
                cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
                block["cache_control"] = cache_control
            if block:
                content_blocks.append(block)
        return content_blocks
--- a/lib/crewai/src/crewai/llms/providers/azure/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import base64
 import json
 import logging
 import os
@@ -18,8 +17,6 @@ from crewai.utilities.types import LLMMessage
 if TYPE_CHECKING:
    from crewai_files import FileInput, UploadCache
    from crewai.llms.hooks.base import BaseInterceptor
@@ -1040,115 +1037,3 @@ class AzureCompletion(BaseLLM):
        if not self.supports_multimodal():
            return []
        return ["image/"]
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as Azure OpenAI multimodal content blocks.
        Azure OpenAI uses the same image_url format as OpenAI.
        Uses FileResolver for consistent base64 encoding.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache (not used by Azure but kept for interface consistency).
        Returns:
            List of content blocks in Azure OpenAI's expected format.
        """
        if not self.supports_multimodal():
            return []
        from crewai_files import (
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        content_blocks: list[dict[str, Any]] = []
        supported_types = self.supported_multimodal_content_types()
        # Azure doesn't support file uploads for images, so just use inline
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        for file_input in files.values():
            content_type = file_input.content_type
            if not any(content_type.startswith(t) for t in supported_types):
                continue
            resolved = resolver.resolve(file_input, "azure")
            if isinstance(resolved, InlineBase64):
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{resolved.content_type};base64,{resolved.data}"
                        },
                    }
                )
            else:
                # Fallback to direct base64 encoding
                data = base64.b64encode(file_input.read()).decode("ascii")
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:{content_type};base64,{data}"},
                    }
                )
        return content_blocks
    async def aformat_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Async format files as Azure OpenAI multimodal content blocks.
        Uses parallel file resolution for improved performance with multiple files.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache (not used by Azure but kept for interface consistency).
        Returns:
            List of content blocks in Azure OpenAI's expected format.
        """
        if not self.supports_multimodal():
            return []
        from crewai_files import (
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        supported_types = self.supported_multimodal_content_types()
        supported_files = {
            name: f
            for name, f in files.items()
            if any(f.content_type.startswith(t) for t in supported_types)
        }
        if not supported_files:
            return []
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        resolved_files = await resolver.aresolve_files(supported_files, "azure")
        return [
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:{resolved.content_type};base64,{resolved.data}"
                },
            }
            for resolved in resolved_files.values()
            if isinstance(resolved, InlineBase64)
        ]
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -20,7 +20,6 @@ from crewai.utilities.types import LLMMessage
 if TYPE_CHECKING:
    from crewai_files import FileInput, UploadCache
    from mypy_boto3_bedrock_runtime.type_defs import (
        GuardrailConfigurationTypeDef,
        GuardrailStreamConfigurationTypeDef,
@@ -1563,260 +1562,3 @@ class BedrockCompletion(BaseLLM):
            "video/3gpp": "three_gp",
        }
        return format_map.get(content_type)
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as Bedrock Converse API multimodal content blocks.
        Bedrock Converse API supports both raw bytes and S3 URI references.
        S3 uploads are only supported by Amazon Nova models.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for S3 uploads.
        Returns:
            List of content blocks in Bedrock's expected format.
        """
        if not self.supports_multimodal():
            return []
        import os
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBytes,
        )
        content_blocks: list[dict[str, Any]] = []
        is_nova = self._is_nova_model()
        s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
        s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
        prefer_upload = bool(s3_bucket) and is_nova
        config = FileResolverConfig(
            prefer_upload=prefer_upload, use_bytes_for_bedrock=True
        )
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        for name, file_input in files.items():
            content_type = file_input.content_type
            resolved = resolver.resolve(file_input, "bedrock")
            if isinstance(resolved, FileReference) and resolved.file_uri:
                s3_location: dict[str, Any] = {"uri": resolved.file_uri}
                if s3_bucket_owner:
                    s3_location["bucketOwner"] = s3_bucket_owner
                if content_type.startswith("image/"):
                    media_type = content_type.split("/")[-1]
                    if media_type == "jpg":
                        media_type = "jpeg"
                    content_blocks.append(
                        {
                            "image": {
                                "format": media_type,
                                "source": {"s3Location": s3_location},
                            }
                        }
                    )
                elif content_type.startswith("video/"):
                    video_format = self._get_video_format(content_type)
                    if video_format:
                        content_blocks.append(
                            {
                                "video": {
                                    "format": video_format,
                                    "source": {"s3Location": s3_location},
                                }
                            }
                        )
                else:
                    doc_format = self._get_document_format(content_type)
                    if doc_format:
                        content_blocks.append(
                            {
                                "document": {
                                    "name": name,
                                    "format": doc_format,
                                    "source": {"s3Location": s3_location},
                                }
                            }
                        )
            else:
                if isinstance(resolved, InlineBytes):
                    file_bytes = resolved.data
                else:
                    file_bytes = file_input.read()
                if content_type.startswith("image/"):
                    media_type = content_type.split("/")[-1]
                    if media_type == "jpg":
                        media_type = "jpeg"
                    content_blocks.append(
                        {
                            "image": {
                                "format": media_type,
                                "source": {"bytes": file_bytes},
                            }
                        }
                    )
                elif content_type.startswith("video/"):
                    video_format = self._get_video_format(content_type)
                    if video_format:
                        content_blocks.append(
                            {
                                "video": {
                                    "format": video_format,
                                    "source": {"bytes": file_bytes},
                                }
                            }
                        )
                else:
                    doc_format = self._get_document_format(content_type)
                    if doc_format:
                        content_blocks.append(
                            {
                                "document": {
                                    "name": name,
                                    "format": doc_format,
                                    "source": {"bytes": file_bytes},
                                }
                            }
                        )
        return content_blocks
    async def aformat_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Async format files as Bedrock Converse API multimodal content blocks.
        Uses parallel file resolution. S3 uploads are only supported by Nova models.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for S3 uploads.
        Returns:
            List of content blocks in Bedrock's expected format.
        """
        if not self.supports_multimodal():
            return []
        import os
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBytes,
        )
        is_nova = self._is_nova_model()
        s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
        s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
        prefer_upload = bool(s3_bucket) and is_nova
        config = FileResolverConfig(
            prefer_upload=prefer_upload, use_bytes_for_bedrock=True
        )
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        resolved_files = await resolver.aresolve_files(files, "bedrock")
        content_blocks: list[dict[str, Any]] = []
        for name, resolved in resolved_files.items():
            file_input = files[name]
            content_type = file_input.content_type
            if isinstance(resolved, FileReference) and resolved.file_uri:
                s3_location: dict[str, Any] = {"uri": resolved.file_uri}
                if s3_bucket_owner:
                    s3_location["bucketOwner"] = s3_bucket_owner
                if content_type.startswith("image/"):
                    media_type = content_type.split("/")[-1]
                    if media_type == "jpg":
                        media_type = "jpeg"
                    content_blocks.append(
                        {
                            "image": {
                                "format": media_type,
                                "source": {"s3Location": s3_location},
                            }
                        }
                    )
                elif content_type.startswith("video/"):
                    video_format = self._get_video_format(content_type)
                    if video_format:
                        content_blocks.append(
                            {
                                "video": {
                                    "format": video_format,
                                    "source": {"s3Location": s3_location},
                                }
                            }
                        )
                else:
                    doc_format = self._get_document_format(content_type)
                    if doc_format:
                        content_blocks.append(
                            {
                                "document": {
                                    "name": name,
                                    "format": doc_format,
                                    "source": {"s3Location": s3_location},
                                }
                            }
                        )
            else:
                if isinstance(resolved, InlineBytes):
                    file_bytes = resolved.data
                else:
                    file_bytes = await file_input.aread()
                if content_type.startswith("image/"):
                    media_type = content_type.split("/")[-1]
                    if media_type == "jpg":
                        media_type = "jpeg"
                    content_blocks.append(
                        {
                            "image": {
                                "format": media_type,
                                "source": {"bytes": file_bytes},
                            }
                        }
                    )
                elif content_type.startswith("video/"):
                    video_format = self._get_video_format(content_type)
                    if video_format:
                        content_blocks.append(
                            {
                                "video": {
                                    "format": video_format,
                                    "source": {"bytes": file_bytes},
                                }
                            }
                        )
                else:
                    doc_format = self._get_document_format(content_type)
                    if doc_format:
                        content_blocks.append(
                            {
                                "document": {
                                    "name": name,
                                    "format": doc_format,
                                    "source": {"bytes": file_bytes},
                                }
                            }
                        )
        return content_blocks
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -19,11 +19,6 @@ from crewai.utilities.types import LLMMessage
 if TYPE_CHECKING:
    from crewai_files import (
        FileInput,
        UploadCache,
    )
    from crewai.llms.hooks.base import BaseInterceptor
@@ -1097,138 +1092,6 @@ class GeminiCompletion(BaseLLM):
        """
        return ["image/", "audio/", "video/", "application/pdf", "text/"]
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as Gemini multimodal content blocks.
        Gemini supports both inlineData format and file references via File API.
        Uses FileResolver to determine the best delivery method based on file size.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
        Returns:
            List of content blocks in Gemini's expected format.
        """
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        content_blocks: list[dict[str, Any]] = []
        supported_types = self.supported_multimodal_content_types()
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        for file_input in files.values():
            content_type = file_input.content_type
            if not any(content_type.startswith(t) for t in supported_types):
                continue
            resolved = resolver.resolve(file_input, "gemini")
            if isinstance(resolved, FileReference) and resolved.file_uri:
                # Use file reference format for uploaded files
                content_blocks.append(
                    {
                        "fileData": {
                            "mimeType": resolved.content_type,
                            "fileUri": resolved.file_uri,
                        }
                    }
                )
            elif isinstance(resolved, InlineBase64):
                # Use inline format for smaller files
                content_blocks.append(
                    {
                        "inlineData": {
                            "mimeType": resolved.content_type,
                            "data": resolved.data,
                        }
                    }
                )
            else:
                # Fallback to base64 encoding
                data = base64.b64encode(file_input.read()).decode("ascii")
                content_blocks.append(
                    {
                        "inlineData": {
                            "mimeType": content_type,
                            "data": data,
                        }
                    }
                )
        return content_blocks
    async def aformat_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Async format files as Gemini multimodal content blocks.
        Uses parallel file resolution for improved performance with multiple files.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
        Returns:
            List of content blocks in Gemini's expected format.
        """
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        supported_types = self.supported_multimodal_content_types()
        supported_files = {
            name: f
            for name, f in files.items()
            if any(f.content_type.startswith(t) for t in supported_types)
        }
        if not supported_files:
            return []
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        resolved_files = await resolver.aresolve_files(supported_files, "gemini")
        content_blocks: list[dict[str, Any]] = []
        for resolved in resolved_files.values():
            if isinstance(resolved, FileReference) and resolved.file_uri:
                content_blocks.append(
                    {
                        "fileData": {
                            "mimeType": resolved.content_type,
                            "fileUri": resolved.file_uri,
                        }
                    }
                )
            elif isinstance(resolved, InlineBase64):
                content_blocks.append(
                    {
                        "inlineData": {
                            "mimeType": resolved.content_type,
                            "data": resolved.data,
                        }
                    }
                )
        return content_blocks
    def format_text_content(self, text: str) -> dict[str, Any]:
        """Format text as a Gemini content block.
--- a/lib/crewai/src/crewai/llms/providers/openai/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import base64
 from collections.abc import AsyncIterator
 import json
 import logging
@@ -27,8 +26,6 @@ from crewai.utilities.types import LLMMessage
 if TYPE_CHECKING:
    from crewai_files import FileInput, UploadCache
    from crewai.agent.core import Agent
    from crewai.llms.hooks.base import BaseInterceptor
    from crewai.task import Task
@@ -1080,136 +1077,3 @@ class OpenAICompletion(BaseLLM):
        if not self.supports_multimodal():
            return []
        return ["image/"]
    def format_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Format files as OpenAI multimodal content blocks.
        OpenAI supports both base64 data URLs and file_id references via Files API.
        Uses FileResolver to determine the best delivery method based on file size.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
        Returns:
            List of content blocks in OpenAI's expected format.
        """
        if not self.supports_multimodal():
            return []
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        content_blocks: list[dict[str, Any]] = []
        supported_types = self.supported_multimodal_content_types()
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        for file_input in files.values():
            content_type = file_input.content_type
            if not any(content_type.startswith(t) for t in supported_types):
                continue
            resolved = resolver.resolve(file_input, "openai")
            if isinstance(resolved, FileReference):
                content_blocks.append(
                    {
                        "type": "file",
                        "file": {
                            "file_id": resolved.file_id,
                        },
                    }
                )
            elif isinstance(resolved, InlineBase64):
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{resolved.content_type};base64,{resolved.data}"
                        },
                    }
                )
            else:
                data = base64.b64encode(file_input.read()).decode("ascii")
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:{content_type};base64,{data}"},
                    }
                )
        return content_blocks
    async def aformat_multimodal_content(
        self,
        files: dict[str, FileInput],
        upload_cache: UploadCache | None = None,
    ) -> list[dict[str, Any]]:
        """Async format files as OpenAI multimodal content blocks.
        Uses parallel file resolution for improved performance with multiple files.
        Args:
            files: Dictionary mapping file names to FileInput objects.
            upload_cache: Optional cache for tracking uploaded files.
        Returns:
            List of content blocks in OpenAI's expected format.
        """
        if not self.supports_multimodal():
            return []
        from crewai_files import (
            FileReference,
            FileResolver,
            FileResolverConfig,
            InlineBase64,
        )
        supported_types = self.supported_multimodal_content_types()
        supported_files = {
            name: f
            for name, f in files.items()
            if any(f.content_type.startswith(t) for t in supported_types)
        }
        if not supported_files:
            return []
        config = FileResolverConfig(prefer_upload=False)
        resolver = FileResolver(config=config, upload_cache=upload_cache)
        resolved_files = await resolver.aresolve_files(supported_files, "openai")
        content_blocks: list[dict[str, Any]] = []
        for resolved in resolved_files.values():
            if isinstance(resolved, FileReference):
                content_blocks.append(
                    {
                        "type": "file",
                        "file": {
                            "file_id": resolved.file_id,
                        },
                    }
                )
            elif isinstance(resolved, InlineBase64):
                content_blocks.append(
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{resolved.content_type};base64,{resolved.data}"
                        },
                    }
                )
        return content_blocks
--- a/uv.lock
+++ b/uv.lock
@@ -1345,7 +1345,6 @@ requires-dist = [
 [[package]]
 name = "crewai-files"
 version = "0.1.0"
 source = { editable = "lib/crewai-files" }
 dependencies = [
    { name = "aiocache" },