feat(files): add prefer_upload parameter to format_multimodal_content

Allow callers to force file uploads via the high-level API instead of
only triggering uploads based on file size thresholds. Useful for
testing and when file_id references are preferred over inline base64.
This commit is contained in:
Greyson LaLonde
2026-01-23 02:19:12 -05:00
parent dc4bbfb5b9
commit ceb2bdc7fb
4 changed files with 419 additions and 5 deletions

View File

@@ -58,6 +58,7 @@ def format_multimodal_content(
files: dict[str, FileInput], files: dict[str, FileInput],
provider: str | None = None, provider: str | None = None,
api: str | None = None, api: str | None = None,
prefer_upload: bool | None = None,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
"""Format files as provider-specific multimodal content blocks. """Format files as provider-specific multimodal content blocks.
@@ -71,6 +72,8 @@ def format_multimodal_content(
files: Dictionary mapping file names to FileInput objects. files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini"). provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
api: API variant (e.g., "responses" for OpenAI Responses API). api: API variant (e.g., "responses" for OpenAI Responses API).
prefer_upload: Whether to prefer uploading files instead of inlining.
If None, uses provider-specific defaults.
Returns: Returns:
List of content blocks in the provider's expected format. List of content blocks in the provider's expected format.
@@ -81,6 +84,10 @@ def format_multimodal_content(
>>> blocks = format_multimodal_content(files, "openai") >>> blocks = format_multimodal_content(files, "openai")
>>> # For OpenAI Responses API: >>> # For OpenAI Responses API:
>>> blocks = format_multimodal_content(files, "openai", api="responses") >>> blocks = format_multimodal_content(files, "openai", api="responses")
>>> # With file upload:
>>> blocks = format_multimodal_content(
... files, "openai", api="responses", prefer_upload=True
... )
""" """
if not files: if not files:
return [] return []
@@ -100,7 +107,7 @@ def format_multimodal_content(
if not supported_files: if not supported_files:
return [] return []
config = _get_resolver_config(provider_type) config = _get_resolver_config(provider_type, prefer_upload)
upload_cache = get_upload_cache() upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache) resolver = FileResolver(config=config, upload_cache=upload_cache)
@@ -120,6 +127,7 @@ async def aformat_multimodal_content(
files: dict[str, FileInput], files: dict[str, FileInput],
provider: str | None = None, provider: str | None = None,
api: str | None = None, api: str | None = None,
prefer_upload: bool | None = None,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
"""Async format files as provider-specific multimodal content blocks. """Async format files as provider-specific multimodal content blocks.
@@ -129,6 +137,8 @@ async def aformat_multimodal_content(
files: Dictionary mapping file names to FileInput objects. files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini"). provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
api: API variant (e.g., "responses" for OpenAI Responses API). api: API variant (e.g., "responses" for OpenAI Responses API).
prefer_upload: Whether to prefer uploading files instead of inlining.
If None, uses provider-specific defaults.
Returns: Returns:
List of content blocks in the provider's expected format. List of content blocks in the provider's expected format.
@@ -151,7 +161,7 @@ async def aformat_multimodal_content(
if not supported_files: if not supported_files:
return [] return []
config = _get_resolver_config(provider_type) config = _get_resolver_config(provider_type, prefer_upload)
upload_cache = get_upload_cache() upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache) resolver = FileResolver(config=config, upload_cache=upload_cache)
@@ -220,23 +230,35 @@ def _filter_supported_files(
} }
def _get_resolver_config(provider_lower: str) -> FileResolverConfig: def _get_resolver_config(
provider_lower: str,
prefer_upload_override: bool | None = None,
) -> FileResolverConfig:
"""Get resolver config for provider. """Get resolver config for provider.
Args: Args:
provider_lower: Lowercase provider name. provider_lower: Lowercase provider name.
prefer_upload_override: Override for prefer_upload setting.
If None, uses provider-specific defaults.
Returns: Returns:
Configured FileResolverConfig. Configured FileResolverConfig.
""" """
if "bedrock" in provider_lower: if "bedrock" in provider_lower:
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET") s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
prefer_upload = bool(s3_bucket) prefer_upload = (
prefer_upload_override
if prefer_upload_override is not None
else bool(s3_bucket)
)
return FileResolverConfig( return FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True prefer_upload=prefer_upload, use_bytes_for_bedrock=True
) )
return FileResolverConfig(prefer_upload=False) prefer_upload = (
prefer_upload_override if prefer_upload_override is not None else False
)
return FileResolverConfig(prefer_upload=prefer_upload)
def _get_formatter( def _get_formatter(

View File

@@ -707,6 +707,76 @@ class TestOpenAIResponsesFileUploadIntegration:
response = llm.call(messages) response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
@pytest.mark.vcr()
def test_describe_image_via_format_api(self, test_image_bytes: bytes) -> None:
"""Test format_multimodal_content with api='responses' parameter."""
llm = LLM(model="openai/gpt-4o-mini", api="responses")
files = {"image": ImageFile(source=test_image_bytes)}
content_blocks = format_multimodal_content(files, "openai", api="responses")
# Verify content blocks use Responses API format
assert len(content_blocks) == 1
block = content_blocks[0]
assert block.get("type") == "input_image", (
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
)
# Should have image_url (base64 data URL) since we're not forcing upload
assert "image_url" in block, "Expected image_url in block for inline image"
messages = [
{
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image in one sentence."},
*content_blocks,
],
}
]
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
@pytest.mark.vcr()
def test_describe_image_via_format_api_with_upload(self, test_image_bytes: bytes) -> None:
"""Test format_multimodal_content with prefer_upload=True uploads the file."""
llm = LLM(model="openai/gpt-4o-mini", api="responses")
files = {"image": ImageFile(source=test_image_bytes)}
content_blocks = format_multimodal_content(
files, "openai", api="responses", prefer_upload=True
)
# Verify content blocks use file_id from upload
assert len(content_blocks) == 1
block = content_blocks[0]
assert block.get("type") == "input_image", (
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
)
assert "file_id" in block, (
"Expected file_id in block when prefer_upload=True. "
f"Got keys: {list(block.keys())}"
)
messages = [
{
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image in one sentence."},
*content_blocks,
],
}
]
response = llm.call(messages)
assert response assert response
assert isinstance(response, str) assert isinstance(response, str)
assert len(response) > 0 assert len(response) > 0