feat(files): add prefer_upload parameter to format_multimodal_content

Allow callers to force file uploads via the high-level API instead of
only triggering uploads based on file size thresholds. Useful for
testing and when file_id references are preferred over inline base64.
This commit is contained in:
Greyson LaLonde
2026-01-23 02:19:12 -05:00
parent dc4bbfb5b9
commit ceb2bdc7fb
4 changed files with 419 additions and 5 deletions

View File

@@ -58,6 +58,7 @@ def format_multimodal_content(
files: dict[str, FileInput],
provider: str | None = None,
api: str | None = None,
prefer_upload: bool | None = None,
) -> list[dict[str, Any]]:
"""Format files as provider-specific multimodal content blocks.
@@ -71,6 +72,8 @@ def format_multimodal_content(
files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
api: API variant (e.g., "responses" for OpenAI Responses API).
prefer_upload: Whether to prefer uploading files instead of inlining.
If None, uses provider-specific defaults.
Returns:
List of content blocks in the provider's expected format.
@@ -81,6 +84,10 @@ def format_multimodal_content(
>>> blocks = format_multimodal_content(files, "openai")
>>> # For OpenAI Responses API:
>>> blocks = format_multimodal_content(files, "openai", api="responses")
>>> # With file upload:
>>> blocks = format_multimodal_content(
... files, "openai", api="responses", prefer_upload=True
... )
"""
if not files:
return []
@@ -100,7 +107,7 @@ def format_multimodal_content(
if not supported_files:
return []
config = _get_resolver_config(provider_type)
config = _get_resolver_config(provider_type, prefer_upload)
upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache)
@@ -120,6 +127,7 @@ async def aformat_multimodal_content(
files: dict[str, FileInput],
provider: str | None = None,
api: str | None = None,
prefer_upload: bool | None = None,
) -> list[dict[str, Any]]:
"""Async format files as provider-specific multimodal content blocks.
@@ -129,6 +137,8 @@ async def aformat_multimodal_content(
files: Dictionary mapping file names to FileInput objects.
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
api: API variant (e.g., "responses" for OpenAI Responses API).
prefer_upload: Whether to prefer uploading files instead of inlining.
If None, uses provider-specific defaults.
Returns:
List of content blocks in the provider's expected format.
@@ -151,7 +161,7 @@ async def aformat_multimodal_content(
if not supported_files:
return []
config = _get_resolver_config(provider_type)
config = _get_resolver_config(provider_type, prefer_upload)
upload_cache = get_upload_cache()
resolver = FileResolver(config=config, upload_cache=upload_cache)
@@ -220,23 +230,35 @@ def _filter_supported_files(
}
def _get_resolver_config(provider_lower: str) -> FileResolverConfig:
def _get_resolver_config(
provider_lower: str,
prefer_upload_override: bool | None = None,
) -> FileResolverConfig:
"""Get resolver config for provider.
Args:
provider_lower: Lowercase provider name.
prefer_upload_override: Override for prefer_upload setting.
If None, uses provider-specific defaults.
Returns:
Configured FileResolverConfig.
"""
if "bedrock" in provider_lower:
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
prefer_upload = bool(s3_bucket)
prefer_upload = (
prefer_upload_override
if prefer_upload_override is not None
else bool(s3_bucket)
)
return FileResolverConfig(
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
)
return FileResolverConfig(prefer_upload=False)
prefer_upload = (
prefer_upload_override if prefer_upload_override is not None else False
)
return FileResolverConfig(prefer_upload=prefer_upload)
def _get_formatter(

View File

@@ -707,6 +707,76 @@ class TestOpenAIResponsesFileUploadIntegration:
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
@pytest.mark.vcr()
def test_describe_image_via_format_api(self, test_image_bytes: bytes) -> None:
"""Test format_multimodal_content with api='responses' parameter."""
llm = LLM(model="openai/gpt-4o-mini", api="responses")
files = {"image": ImageFile(source=test_image_bytes)}
content_blocks = format_multimodal_content(files, "openai", api="responses")
# Verify content blocks use Responses API format
assert len(content_blocks) == 1
block = content_blocks[0]
assert block.get("type") == "input_image", (
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
)
# Should have image_url (base64 data URL) since we're not forcing upload
assert "image_url" in block, "Expected image_url in block for inline image"
messages = [
{
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image in one sentence."},
*content_blocks,
],
}
]
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
@pytest.mark.vcr()
def test_describe_image_via_format_api_with_upload(self, test_image_bytes: bytes) -> None:
"""Test format_multimodal_content with prefer_upload=True uploads the file."""
llm = LLM(model="openai/gpt-4o-mini", api="responses")
files = {"image": ImageFile(source=test_image_bytes)}
content_blocks = format_multimodal_content(
files, "openai", api="responses", prefer_upload=True
)
# Verify content blocks use file_id from upload
assert len(content_blocks) == 1
block = content_blocks[0]
assert block.get("type") == "input_image", (
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
)
assert "file_id" in block, (
"Expected file_id in block when prefer_upload=True. "
f"Got keys: {list(block.keys())}"
)
messages = [
{
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image in one sentence."},
*content_blocks,
],
}
]
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0