refactor: centralize multimodal formatting in crewai_files

This commit is contained in:
Greyson LaLonde
2026-01-22 15:59:55 -05:00
parent b95a3a9bc8
commit ca07114bcf
18 changed files with 742 additions and 1002 deletions

View File

@@ -10,7 +10,7 @@ from collections.abc import Callable
import logging
from typing import TYPE_CHECKING, Any, Literal, cast
from crewai_files import FileProcessor
from crewai_files import aformat_multimodal_content, format_multimodal_content
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
from pydantic_core import CoreSchema, core_schema
@@ -220,9 +220,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
"""Inject files as multimodal content into messages.
For crews with input files and LLMs that support multimodal,
processes files according to provider constraints and file handling mode,
then delegates to the LLM's format_multimodal_content method to
generate provider-specific content blocks.
uses crewai_files to process, resolve, and format files into
provider-specific content blocks.
"""
if not self.crew or not self.task:
return
@@ -235,15 +234,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
return
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
processor = FileProcessor(constraints=provider)
files = processor.process_files(files)
content_blocks = format_multimodal_content(files, provider)
from crewai_files import get_upload_cache
upload_cache = get_upload_cache()
content_blocks = self.llm.format_multimodal_content(
files, upload_cache=upload_cache
)
if not content_blocks:
return
@@ -262,9 +254,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
"""Async inject files as multimodal content into messages.
For crews with input files and LLMs that support multimodal,
processes files according to provider constraints using parallel processing,
then delegates to the LLM's aformat_multimodal_content method to
generate provider-specific content blocks with parallel file resolution.
uses crewai_files to process, resolve, and format files into
provider-specific content blocks with parallel file resolution.
"""
if not self.crew or not self.task:
return
@@ -277,15 +268,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
return
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
processor = FileProcessor(constraints=provider)
files = await processor.aprocess_files(files)
content_blocks = await aformat_multimodal_content(files, provider)
from crewai_files import get_upload_cache
upload_cache = get_upload_cache()
content_blocks = await self.llm.aformat_multimodal_content(
files, upload_cache=upload_cache
)
if not content_blocks:
return

View File

@@ -53,7 +53,6 @@ from crewai.utilities.logger_utils import suppress_warnings
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from litellm.exceptions import ContextWindowExceededError
from litellm.litellm_core_utils.get_supported_openai_params import (
get_supported_openai_params,
@@ -2254,66 +2253,3 @@ class LLM(BaseLLM):
if "claude-3" in model_lower or "claude-4" in model_lower:
return ["image/", "application/pdf"]
return ["image/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as multimodal content blocks for litellm.
Uses OpenAI-compatible format which litellm translates to provider format.
Uses FileResolver for consistent base64 encoding.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache (not used by litellm but kept for interface consistency).
Returns:
List of content blocks in OpenAI's expected format.
"""
import base64
from crewai_files import (
FileResolver,
FileResolverConfig,
InlineBase64,
)
if not self.supports_multimodal():
return []
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
# LiteLLM uses OpenAI-compatible format
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "openai")
if isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
else:
# Fallback to direct base64 encoding
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
)
return content_blocks

View File

@@ -32,8 +32,6 @@ from crewai.types.usage_metrics import UsageMetrics
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.agent.core import Agent
from crewai.task import Task
from crewai.tools.base_tool import BaseTool
@@ -298,43 +296,6 @@ class BaseLLM(ABC):
"""
return []
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as multimodal content blocks for the LLM.
Subclasses should override this to provide provider-specific formatting.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in the provider's expected format.
"""
return []
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as multimodal content blocks for the LLM.
Default implementation calls the sync version. Subclasses should
override to use async file resolution for parallel processing.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in the provider's expected format.
"""
return self.format_multimodal_content(files, upload_cache)
def format_text_content(self, text: str) -> dict[str, Any]:
"""Format text as a content block for the LLM.

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
import json
import logging
import os
@@ -20,12 +19,8 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.llms.hooks.base import BaseInterceptor
DEFAULT_CACHE_TTL = "ephemeral"
try:
from anthropic import Anthropic, AsyncAnthropic
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
@@ -1256,222 +1251,3 @@ class AnthropicCompletion(BaseLLM):
if not self.supports_multimodal():
return []
return ["image/", "application/pdf"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
enable_caching: bool = True,
cache_ttl: str | None = None,
) -> list[dict[str, Any]]:
"""Format files as Anthropic multimodal content blocks.
Anthropic supports both base64 inline format and file references via Files API.
Uses FileResolver to determine the best delivery method based on file size.
Supports prompt caching to reduce costs and latency for repeated file usage.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
enable_caching: Whether to add cache_control markers (default: True).
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
Returns:
List of content blocks in Anthropic's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
file_list = list(files.values())
num_files = len(file_list)
for i, file_input in enumerate(file_list):
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "anthropic")
block: dict[str, Any] = {}
if isinstance(resolved, FileReference):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif isinstance(resolved, InlineBase64):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
else:
data = base64.b64encode(file_input.read()).decode("ascii")
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
if block and enable_caching and i == num_files - 1:
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
block["cache_control"] = cache_control
if block:
content_blocks.append(block)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
enable_caching: bool = True,
cache_ttl: str | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Anthropic multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
enable_caching: Whether to add cache_control markers (default: True).
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
Returns:
List of content blocks in Anthropic's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "anthropic")
content_blocks: list[dict[str, Any]] = []
num_files = len(resolved_files)
file_names = list(supported_files.keys())
for i, name in enumerate(file_names):
if name not in resolved_files:
continue
resolved = resolved_files[name]
file_input = supported_files[name]
content_type = file_input.content_type
block: dict[str, Any] = {}
if isinstance(resolved, FileReference):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "file",
"file_id": resolved.file_id,
},
}
elif isinstance(resolved, InlineBase64):
if content_type.startswith("image/"):
block = {
"type": "image",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
elif content_type == "application/pdf":
block = {
"type": "document",
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": resolved.data,
},
}
if block and enable_caching and i == num_files - 1:
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
block["cache_control"] = cache_control
if block:
content_blocks.append(block)
return content_blocks

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
import json
import logging
import os
@@ -18,8 +17,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.llms.hooks.base import BaseInterceptor
@@ -1040,115 +1037,3 @@ class AzureCompletion(BaseLLM):
if not self.supports_multimodal():
return []
return ["image/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as Azure OpenAI multimodal content blocks.
Azure OpenAI uses the same image_url format as OpenAI.
Uses FileResolver for consistent base64 encoding.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
Returns:
List of content blocks in Azure OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
# Azure doesn't support file uploads for images, so just use inline
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "azure")
if isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
else:
# Fallback to direct base64 encoding
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Azure OpenAI multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
Returns:
List of content blocks in Azure OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "azure")
return [
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
for resolved in resolved_files.values()
if isinstance(resolved, InlineBase64)
]

View File

@@ -20,7 +20,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from mypy_boto3_bedrock_runtime.type_defs import (
GuardrailConfigurationTypeDef,
GuardrailStreamConfigurationTypeDef,
@@ -1563,260 +1562,3 @@ class BedrockCompletion(BaseLLM):
"video/3gpp": "three_gp",
}
return format_map.get(content_type)
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as Bedrock Converse API multimodal content blocks.
Bedrock Converse API supports both raw bytes and S3 URI references.
S3 uploads are only supported by Amazon Nova models.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for S3 uploads.
Returns:
List of content blocks in Bedrock's expected format.
"""
if not self.supports_multimodal():
return []
import os
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBytes,
)
content_blocks: list[dict[str, Any]] = []
is_nova = self._is_nova_model()
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
prefer_upload = bool(s3_bucket) and is_nova
config = FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for name, file_input in files.items():
content_type = file_input.content_type
resolved = resolver.resolve(file_input, "bedrock")
if isinstance(resolved, FileReference) and resolved.file_uri:
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
if s3_bucket_owner:
s3_location["bucketOwner"] = s3_bucket_owner
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"s3Location": s3_location},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"s3Location": s3_location},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"s3Location": s3_location},
}
}
)
else:
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = file_input.read()
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"bytes": file_bytes},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"bytes": file_bytes},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"bytes": file_bytes},
}
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Bedrock Converse API multimodal content blocks.
Uses parallel file resolution. S3 uploads are only supported by Nova models.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for S3 uploads.
Returns:
List of content blocks in Bedrock's expected format.
"""
if not self.supports_multimodal():
return []
import os
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBytes,
)
is_nova = self._is_nova_model()
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
prefer_upload = bool(s3_bucket) and is_nova
config = FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(files, "bedrock")
content_blocks: list[dict[str, Any]] = []
for name, resolved in resolved_files.items():
file_input = files[name]
content_type = file_input.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
if s3_bucket_owner:
s3_location["bucketOwner"] = s3_bucket_owner
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"s3Location": s3_location},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"s3Location": s3_location},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"s3Location": s3_location},
}
}
)
else:
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = await file_input.aread()
if content_type.startswith("image/"):
media_type = content_type.split("/")[-1]
if media_type == "jpg":
media_type = "jpeg"
content_blocks.append(
{
"image": {
"format": media_type,
"source": {"bytes": file_bytes},
}
}
)
elif content_type.startswith("video/"):
video_format = self._get_video_format(content_type)
if video_format:
content_blocks.append(
{
"video": {
"format": video_format,
"source": {"bytes": file_bytes},
}
}
)
else:
doc_format = self._get_document_format(content_type)
if doc_format:
content_blocks.append(
{
"document": {
"name": name,
"format": doc_format,
"source": {"bytes": file_bytes},
}
}
)
return content_blocks

View File

@@ -19,11 +19,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import (
FileInput,
UploadCache,
)
from crewai.llms.hooks.base import BaseInterceptor
@@ -1097,138 +1092,6 @@ class GeminiCompletion(BaseLLM):
"""
return ["image/", "audio/", "video/", "application/pdf", "text/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as Gemini multimodal content blocks.
Gemini supports both inlineData format and file references via File API.
Uses FileResolver to determine the best delivery method based on file size.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in Gemini's expected format.
"""
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "gemini")
if isinstance(resolved, FileReference) and resolved.file_uri:
# Use file reference format for uploaded files
content_blocks.append(
{
"fileData": {
"mimeType": resolved.content_type,
"fileUri": resolved.file_uri,
}
}
)
elif isinstance(resolved, InlineBase64):
# Use inline format for smaller files
content_blocks.append(
{
"inlineData": {
"mimeType": resolved.content_type,
"data": resolved.data,
}
}
)
else:
# Fallback to base64 encoding
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"inlineData": {
"mimeType": content_type,
"data": data,
}
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as Gemini multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in Gemini's expected format.
"""
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "gemini")
content_blocks: list[dict[str, Any]] = []
for resolved in resolved_files.values():
if isinstance(resolved, FileReference) and resolved.file_uri:
content_blocks.append(
{
"fileData": {
"mimeType": resolved.content_type,
"fileUri": resolved.file_uri,
}
}
)
elif isinstance(resolved, InlineBase64):
content_blocks.append(
{
"inlineData": {
"mimeType": resolved.content_type,
"data": resolved.data,
}
}
)
return content_blocks
def format_text_content(self, text: str) -> dict[str, Any]:
"""Format text as a Gemini content block.

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
from collections.abc import AsyncIterator
import json
import logging
@@ -27,8 +26,6 @@ from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai_files import FileInput, UploadCache
from crewai.agent.core import Agent
from crewai.llms.hooks.base import BaseInterceptor
from crewai.task import Task
@@ -1080,136 +1077,3 @@ class OpenAICompletion(BaseLLM):
if not self.supports_multimodal():
return []
return ["image/"]
def format_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Format files as OpenAI multimodal content blocks.
OpenAI supports both base64 data URLs and file_id references via Files API.
Uses FileResolver to determine the best delivery method based on file size.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
content_blocks: list[dict[str, Any]] = []
supported_types = self.supported_multimodal_content_types()
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
for file_input in files.values():
content_type = file_input.content_type
if not any(content_type.startswith(t) for t in supported_types):
continue
resolved = resolver.resolve(file_input, "openai")
if isinstance(resolved, FileReference):
content_blocks.append(
{
"type": "file",
"file": {
"file_id": resolved.file_id,
},
}
)
elif isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
else:
data = base64.b64encode(file_input.read()).decode("ascii")
content_blocks.append(
{
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
)
return content_blocks
async def aformat_multimodal_content(
self,
files: dict[str, FileInput],
upload_cache: UploadCache | None = None,
) -> list[dict[str, Any]]:
"""Async format files as OpenAI multimodal content blocks.
Uses parallel file resolution for improved performance with multiple files.
Args:
files: Dictionary mapping file names to FileInput objects.
upload_cache: Optional cache for tracking uploaded files.
Returns:
List of content blocks in OpenAI's expected format.
"""
if not self.supports_multimodal():
return []
from crewai_files import (
FileReference,
FileResolver,
FileResolverConfig,
InlineBase64,
)
supported_types = self.supported_multimodal_content_types()
supported_files = {
name: f
for name, f in files.items()
if any(f.content_type.startswith(t) for t in supported_types)
}
if not supported_files:
return []
config = FileResolverConfig(prefer_upload=False)
resolver = FileResolver(config=config, upload_cache=upload_cache)
resolved_files = await resolver.aresolve_files(supported_files, "openai")
content_blocks: list[dict[str, Any]] = []
for resolved in resolved_files.values():
if isinstance(resolved, FileReference):
content_blocks.append(
{
"type": "file",
"file": {
"file_id": resolved.file_id,
},
}
)
elif isinstance(resolved, InlineBase64):
content_blocks.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}
)
return content_blocks