feat(files): add prefer_upload parameter to format_multimodal_content

Allow callers to force file uploads via the high-level API instead of only triggering uploads based on file size thresholds. Useful for testing and when file_id references are preferred over inline base64.
2026-05-02 07:42:40 +00:00 · 2026-01-23 02:19:12 -05:00
parent dc4bbfb5b9
commit ceb2bdc7fb
4 changed files with 419 additions and 5 deletions
--- a/lib/crewai/tests/cassettes/llms/TestOpenAIResponsesFileUploadIntegration.test_describe_image_via_format_api.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestOpenAIResponsesFileUploadIntegration.test_describe_image_via_format_api.yaml
--- a/lib/crewai/tests/cassettes/llms/TestOpenAIResponsesFileUploadIntegration.test_describe_image_via_format_api_with_upload.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestOpenAIResponsesFileUploadIntegration.test_describe_image_via_format_api_with_upload.yaml
--- a/lib/crewai/tests/llms/test_multimodal_integration.py
+++ b/lib/crewai/tests/llms/test_multimodal_integration.py
@@ -707,6 +707,76 @@ class TestOpenAIResponsesFileUploadIntegration:

        response = llm.call(messages)

+        assert response
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+    @pytest.mark.vcr()
+    def test_describe_image_via_format_api(self, test_image_bytes: bytes) -> None:
+        """Test format_multimodal_content with api='responses' parameter."""
+        llm = LLM(model="openai/gpt-4o-mini", api="responses")
+        files = {"image": ImageFile(source=test_image_bytes)}
+
+        content_blocks = format_multimodal_content(files, "openai", api="responses")
+
+        # Verify content blocks use Responses API format
+        assert len(content_blocks) == 1
+        block = content_blocks[0]
+        assert block.get("type") == "input_image", (
+            f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
+        )
+        # Should have image_url (base64 data URL) since we're not forcing upload
+        assert "image_url" in block, "Expected image_url in block for inline image"
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": "Describe this image in one sentence."},
+                    *content_blocks,
+                ],
+            }
+        ]
+
+        response = llm.call(messages)
+
+        assert response
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+    @pytest.mark.vcr()
+    def test_describe_image_via_format_api_with_upload(self, test_image_bytes: bytes) -> None:
+        """Test format_multimodal_content with prefer_upload=True uploads the file."""
+        llm = LLM(model="openai/gpt-4o-mini", api="responses")
+        files = {"image": ImageFile(source=test_image_bytes)}
+
+        content_blocks = format_multimodal_content(
+            files, "openai", api="responses", prefer_upload=True
+        )
+
+        # Verify content blocks use file_id from upload
+        assert len(content_blocks) == 1
+        block = content_blocks[0]
+        assert block.get("type") == "input_image", (
+            f"Expected type 'input_image' for Responses API, got '{block.get('type')}'"
+        )
+        assert "file_id" in block, (
+            "Expected file_id in block when prefer_upload=True. "
+            f"Got keys: {list(block.keys())}"
+        )
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": "Describe this image in one sentence."},
+                    *content_blocks,
+                ],
+            }
+        ]
+
+        response = llm.call(messages)
+
        assert response
        assert isinstance(response, str)
        assert len(response) > 0