mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-24 23:58:15 +00:00
feat(files): add prefer_upload parameter to format_multimodal_content
Allow callers to force file uploads via the high-level API instead of only triggering uploads based on file size thresholds. Useful for testing and when file_id references are preferred over inline base64.
This commit is contained in:
@@ -58,6 +58,7 @@ def format_multimodal_content(
|
|||||||
files: dict[str, FileInput],
|
files: dict[str, FileInput],
|
||||||
provider: str | None = None,
|
provider: str | None = None,
|
||||||
api: str | None = None,
|
api: str | None = None,
|
||||||
|
prefer_upload: bool | None = None,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Format files as provider-specific multimodal content blocks.
|
"""Format files as provider-specific multimodal content blocks.
|
||||||
|
|
||||||
@@ -71,6 +72,8 @@ def format_multimodal_content(
|
|||||||
files: Dictionary mapping file names to FileInput objects.
|
files: Dictionary mapping file names to FileInput objects.
|
||||||
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
|
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
|
||||||
api: API variant (e.g., "responses" for OpenAI Responses API).
|
api: API variant (e.g., "responses" for OpenAI Responses API).
|
||||||
|
prefer_upload: Whether to prefer uploading files instead of inlining.
|
||||||
|
If None, uses provider-specific defaults.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of content blocks in the provider's expected format.
|
List of content blocks in the provider's expected format.
|
||||||
@@ -81,6 +84,10 @@ def format_multimodal_content(
|
|||||||
>>> blocks = format_multimodal_content(files, "openai")
|
>>> blocks = format_multimodal_content(files, "openai")
|
||||||
>>> # For OpenAI Responses API:
|
>>> # For OpenAI Responses API:
|
||||||
>>> blocks = format_multimodal_content(files, "openai", api="responses")
|
>>> blocks = format_multimodal_content(files, "openai", api="responses")
|
||||||
|
>>> # With file upload:
|
||||||
|
>>> blocks = format_multimodal_content(
|
||||||
|
... files, "openai", api="responses", prefer_upload=True
|
||||||
|
... )
|
||||||
"""
|
"""
|
||||||
if not files:
|
if not files:
|
||||||
return []
|
return []
|
||||||
@@ -100,7 +107,7 @@ def format_multimodal_content(
|
|||||||
if not supported_files:
|
if not supported_files:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
config = _get_resolver_config(provider_type)
|
config = _get_resolver_config(provider_type, prefer_upload)
|
||||||
upload_cache = get_upload_cache()
|
upload_cache = get_upload_cache()
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||||
|
|
||||||
@@ -120,6 +127,7 @@ async def aformat_multimodal_content(
|
|||||||
files: dict[str, FileInput],
|
files: dict[str, FileInput],
|
||||||
provider: str | None = None,
|
provider: str | None = None,
|
||||||
api: str | None = None,
|
api: str | None = None,
|
||||||
|
prefer_upload: bool | None = None,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Async format files as provider-specific multimodal content blocks.
|
"""Async format files as provider-specific multimodal content blocks.
|
||||||
|
|
||||||
@@ -129,6 +137,8 @@ async def aformat_multimodal_content(
|
|||||||
files: Dictionary mapping file names to FileInput objects.
|
files: Dictionary mapping file names to FileInput objects.
|
||||||
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
|
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
|
||||||
api: API variant (e.g., "responses" for OpenAI Responses API).
|
api: API variant (e.g., "responses" for OpenAI Responses API).
|
||||||
|
prefer_upload: Whether to prefer uploading files instead of inlining.
|
||||||
|
If None, uses provider-specific defaults.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of content blocks in the provider's expected format.
|
List of content blocks in the provider's expected format.
|
||||||
@@ -151,7 +161,7 @@ async def aformat_multimodal_content(
|
|||||||
if not supported_files:
|
if not supported_files:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
config = _get_resolver_config(provider_type)
|
config = _get_resolver_config(provider_type, prefer_upload)
|
||||||
upload_cache = get_upload_cache()
|
upload_cache = get_upload_cache()
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||||
|
|
||||||
@@ -220,23 +230,35 @@ def _filter_supported_files(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _get_resolver_config(provider_lower: str) -> FileResolverConfig:
|
def _get_resolver_config(
|
||||||
|
provider_lower: str,
|
||||||
|
prefer_upload_override: bool | None = None,
|
||||||
|
) -> FileResolverConfig:
|
||||||
"""Get resolver config for provider.
|
"""Get resolver config for provider.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
provider_lower: Lowercase provider name.
|
provider_lower: Lowercase provider name.
|
||||||
|
prefer_upload_override: Override for prefer_upload setting.
|
||||||
|
If None, uses provider-specific defaults.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Configured FileResolverConfig.
|
Configured FileResolverConfig.
|
||||||
"""
|
"""
|
||||||
if "bedrock" in provider_lower:
|
if "bedrock" in provider_lower:
|
||||||
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
||||||
prefer_upload = bool(s3_bucket)
|
prefer_upload = (
|
||||||
|
prefer_upload_override
|
||||||
|
if prefer_upload_override is not None
|
||||||
|
else bool(s3_bucket)
|
||||||
|
)
|
||||||
return FileResolverConfig(
|
return FileResolverConfig(
|
||||||
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
||||||
)
|
)
|
||||||
|
|
||||||
return FileResolverConfig(prefer_upload=False)
|
prefer_upload = (
|
||||||
|
prefer_upload_override if prefer_upload_override is not None else False
|
||||||
|
)
|
||||||
|
return FileResolverConfig(prefer_upload=prefer_upload)
|
||||||
|
|
||||||
|
|
||||||
def _get_formatter(
|
def _get_formatter(
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -707,6 +707,76 @@ class TestOpenAIResponsesFileUploadIntegration:
|
|||||||
|
|
||||||
response = llm.call(messages)
|
response = llm.call(messages)
|
||||||
|
|
||||||
|
assert response
|
||||||
|
assert isinstance(response, str)
|
||||||
|
assert len(response) > 0
|
||||||
|
|
||||||
|
@pytest.mark.vcr()
|
||||||
|
def test_describe_image_via_format_api(self, test_image_bytes: bytes) -> None:
|
||||||
|
"""Test format_multimodal_content with api='responses' parameter."""
|
||||||
|
llm = LLM(model="openai/gpt-4o-mini", api="responses")
|
||||||
|
files = {"image": ImageFile(source=test_image_bytes)}
|
||||||
|
|
||||||
|
content_blocks = format_multimodal_content(files, "openai", api="responses")
|
||||||
|
|
||||||
|
# Verify content blocks use Responses API format
|
||||||
|
assert len(content_blocks) == 1
|
||||||
|
block = content_blocks[0]
|
||||||
|
assert block.get("type") == "input_image", (
|
||||||
|
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
|
||||||
|
)
|
||||||
|
# Should have image_url (base64 data URL) since we're not forcing upload
|
||||||
|
assert "image_url" in block, "Expected image_url in block for inline image"
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "input_text", "text": "Describe this image in one sentence."},
|
||||||
|
*content_blocks,
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = llm.call(messages)
|
||||||
|
|
||||||
|
assert response
|
||||||
|
assert isinstance(response, str)
|
||||||
|
assert len(response) > 0
|
||||||
|
|
||||||
|
@pytest.mark.vcr()
|
||||||
|
def test_describe_image_via_format_api_with_upload(self, test_image_bytes: bytes) -> None:
|
||||||
|
"""Test format_multimodal_content with prefer_upload=True uploads the file."""
|
||||||
|
llm = LLM(model="openai/gpt-4o-mini", api="responses")
|
||||||
|
files = {"image": ImageFile(source=test_image_bytes)}
|
||||||
|
|
||||||
|
content_blocks = format_multimodal_content(
|
||||||
|
files, "openai", api="responses", prefer_upload=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify content blocks use file_id from upload
|
||||||
|
assert len(content_blocks) == 1
|
||||||
|
block = content_blocks[0]
|
||||||
|
assert block.get("type") == "input_image", (
|
||||||
|
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
|
||||||
|
)
|
||||||
|
assert "file_id" in block, (
|
||||||
|
"Expected file_id in block when prefer_upload=True. "
|
||||||
|
f"Got keys: {list(block.keys())}"
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "input_text", "text": "Describe this image in one sentence."},
|
||||||
|
*content_blocks,
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = llm.call(messages)
|
||||||
|
|
||||||
assert response
|
assert response
|
||||||
assert isinstance(response, str)
|
assert isinstance(response, str)
|
||||||
assert len(response) > 0
|
assert len(response) > 0
|
||||||
Reference in New Issue
Block a user