feat(files): add file_id upload support and text file handling

- Add VCR patch for binary request bodies (base64 encoding fallback)
- Add generate_filename() utility for UUID-based filenames with extension
- Add OpenAIResponsesFormatter for Responses API (input_image, input_file)
- Fix OpenAI uploader to use 'vision' purpose for images
- Fix Anthropic uploader to use tuple format (filename, content, content_type)
- Add TextConstraints and text support for Gemini
- Add file_id upload integration tests for Anthropic and OpenAI Responses API
This commit is contained in:
Greyson LaLonde
2026-01-23 01:57:29 -05:00
parent 7c9ce9ccd8
commit 4ab53c0726
14 changed files with 833 additions and 44 deletions

View File

@@ -1,5 +1,6 @@
"""Pytest configuration for crewAI workspace."""
import base64
from collections.abc import Generator
import gzip
import os
@@ -10,6 +11,7 @@ from typing import Any
from dotenv import load_dotenv
import pytest
from vcr.request import Request # type: ignore[import-untyped]
import vcr.stubs.httpx_stubs as httpx_stubs # type: ignore[import-untyped]
env_test_path = Path(__file__).parent / ".env.test"
@@ -17,6 +19,25 @@ load_dotenv(env_test_path, override=True)
load_dotenv(override=True)
def _patched_make_vcr_request(httpx_request: Any, **kwargs: Any) -> Any:
"""Patched version of VCR's _make_vcr_request that handles binary content.
The original implementation fails on binary request bodies (like file uploads)
because it assumes all content can be decoded as UTF-8.
"""
raw_body = httpx_request.read()
try:
body = raw_body.decode("utf-8")
except UnicodeDecodeError:
body = base64.b64encode(raw_body).decode("ascii")
uri = str(httpx_request.url)
headers = dict(httpx_request.headers)
return Request(httpx_request.method, uri, body, headers)
httpx_stubs._make_vcr_request = _patched_make_vcr_request
@pytest.fixture(autouse=True, scope="function")
def cleanup_event_handlers() -> Generator[None, Any, None]:
"""Clean up event bus handlers after each test to prevent test pollution."""

View File

@@ -54,7 +54,7 @@ class FileOperationMetrics:
}
if self.filename:
result["filename"] = self.filename
result["file_name"] = self.filename
if self.provider:
result["provider"] = self.provider
if self.size_bytes is not None:

View File

@@ -64,6 +64,21 @@ def _fallback_content_type(filename: str | None) -> str:
return "application/octet-stream"
def generate_filename(content_type: str) -> str:
"""Generate a UUID-based filename with extension from content type.
Args:
content_type: MIME type to derive extension from.
Returns:
Filename in format "{uuid}{ext}" where ext includes the dot.
"""
import uuid
ext = mimetypes.guess_extension(content_type) or ""
return f"{uuid.uuid4()}{ext}"
def detect_content_type(data: bytes, filename: str | None = None) -> str:
"""Detect MIME type from file content.

View File

@@ -4,9 +4,11 @@ from crewai_files.formatting.api import (
aformat_multimodal_content,
format_multimodal_content,
)
from crewai_files.formatting.openai import OpenAIResponsesFormatter
__all__ = [
"OpenAIResponsesFormatter",
"aformat_multimodal_content",
"format_multimodal_content",
]

View File

@@ -186,6 +186,11 @@ def _get_supported_types(
supported.append("audio/")
if constraints.video is not None:
supported.append("video/")
if constraints.text is not None:
supported.append("text/")
supported.append("application/json")
supported.append("application/xml")
supported.append("application/x-yaml")
return supported

View File

@@ -14,6 +14,95 @@ from crewai_files.core.resolved import (
)
class OpenAIResponsesFormatter:
"""Formats resolved files into OpenAI Responses API content blocks.
The Responses API uses a different format than Chat Completions:
- Images use `type: "input_image"` with `file_id` or `image_url`
- PDFs use `type: "input_file"` with `file_id`, `file_url`, or `file_data`
"""
@staticmethod
def format_block(resolved: ResolvedFileType, content_type: str) -> dict[str, Any]:
"""Format a resolved file into an OpenAI Responses API content block.
Args:
resolved: Resolved file.
content_type: MIME type of the file.
Returns:
Content block dict.
Raises:
TypeError: If resolved type is not supported.
"""
is_image = content_type.startswith("image/")
is_pdf = content_type == "application/pdf"
if isinstance(resolved, FileReference):
if is_image:
return {
"type": "input_image",
"file_id": resolved.file_id,
}
if is_pdf:
return {
"type": "input_file",
"file_id": resolved.file_id,
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)
if isinstance(resolved, UrlReference):
if is_image:
return {
"type": "input_image",
"image_url": resolved.url,
}
if is_pdf:
return {
"type": "input_file",
"file_url": resolved.url,
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)
if isinstance(resolved, InlineBase64):
if is_image:
return {
"type": "input_image",
"image_url": f"data:{resolved.content_type};base64,{resolved.data}",
}
if is_pdf:
return {
"type": "input_file",
"file_data": f"data:{resolved.content_type};base64,{resolved.data}",
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)
if isinstance(resolved, InlineBytes):
data = base64.b64encode(resolved.data).decode("ascii")
if is_image:
return {
"type": "input_image",
"image_url": f"data:{resolved.content_type};base64,{data}",
}
if is_pdf:
return {
"type": "input_file",
"file_data": f"data:{resolved.content_type};base64,{data}",
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
class OpenAIFormatter:
"""Formats resolved files into OpenAI content blocks."""

View File

@@ -7,6 +7,7 @@ from typing import Literal
from crewai_files.core.types import (
AudioMimeType,
ImageMimeType,
TextContentType,
VideoMimeType,
)
@@ -72,6 +73,27 @@ GEMINI_VIDEO_FORMATS: tuple[VideoMimeType, ...] = (
"video/x-flv",
)
DEFAULT_TEXT_FORMATS: tuple[TextContentType, ...] = (
"text/plain",
"text/markdown",
"text/csv",
"application/json",
"text/xml",
"text/html",
)
GEMINI_TEXT_FORMATS: tuple[TextContentType, ...] = (
"text/plain",
"text/markdown",
"text/csv",
"application/json",
"application/xml",
"text/xml",
"application/x-yaml",
"text/yaml",
"text/html",
)
@dataclass(frozen=True)
class ImageConstraints:
@@ -135,6 +157,19 @@ class VideoConstraints:
supported_formats: tuple[VideoMimeType, ...] = DEFAULT_VIDEO_FORMATS
@dataclass(frozen=True)
class TextConstraints:
"""Constraints for text files.
Attributes:
max_size_bytes: Maximum file size in bytes.
supported_formats: Supported text MIME types.
"""
max_size_bytes: int
supported_formats: tuple[TextContentType, ...] = DEFAULT_TEXT_FORMATS
@dataclass(frozen=True)
class ProviderConstraints:
"""Complete set of constraints for a provider.
@@ -145,6 +180,7 @@ class ProviderConstraints:
pdf: PDF file constraints.
audio: Audio file constraints.
video: Video file constraints.
text: Text file constraints.
general_max_size_bytes: Maximum size for any file type.
supports_file_upload: Whether the provider supports file upload APIs.
file_upload_threshold_bytes: Size threshold above which to use file upload.
@@ -156,6 +192,7 @@ class ProviderConstraints:
pdf: PDFConstraints | None = None
audio: AudioConstraints | None = None
video: VideoConstraints | None = None
text: TextConstraints | None = None
general_max_size_bytes: int | None = None
supports_file_upload: bool = False
file_upload_threshold_bytes: int | None = None
@@ -213,6 +250,10 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
max_duration_seconds=3600, # 1 hour at default resolution
supported_formats=GEMINI_VIDEO_FORMATS,
),
text=TextConstraints(
max_size_bytes=104_857_600,
supported_formats=GEMINI_TEXT_FORMATS,
),
supports_file_upload=True,
file_upload_threshold_bytes=20_971_520,
supports_url_references=True,

View File

@@ -2,11 +2,11 @@
from __future__ import annotations
import io
import logging
import os
from typing import Any
from crewai_files.core.sources import generate_filename
from crewai_files.core.types import FileInput
from crewai_files.processing.exceptions import classify_upload_error
from crewai_files.uploaders.base import FileUploader, UploadResult
@@ -91,17 +91,14 @@ class AnthropicFileUploader(FileUploader):
client = self._get_client()
content = file.read()
file_purpose = purpose or "user_upload"
file_data = io.BytesIO(content)
logger.info(
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
)
uploaded_file = client.files.create(
file=(file.filename, file_data, file.content_type),
purpose=file_purpose,
filename = file.filename or generate_filename(file.content_type)
uploaded_file = client.beta.files.upload(
file=(filename, content, file.content_type),
)
logger.info(f"Uploaded to Anthropic: {uploaded_file.id}")
@@ -129,7 +126,7 @@ class AnthropicFileUploader(FileUploader):
"""
try:
client = self._get_client()
client.files.delete(file_id=file_id)
client.beta.files.delete(file_id=file_id)
logger.info(f"Deleted Anthropic file: {file_id}")
return True
except Exception as e:
@@ -147,7 +144,7 @@ class AnthropicFileUploader(FileUploader):
"""
try:
client = self._get_client()
file_info = client.files.retrieve(file_id=file_id)
file_info = client.beta.files.retrieve(file_id=file_id)
return {
"id": file_info.id,
"filename": file_info.filename,
@@ -167,7 +164,7 @@ class AnthropicFileUploader(FileUploader):
"""
try:
client = self._get_client()
files = client.files.list()
files = client.beta.files.list()
return [
{
"id": f.id,
@@ -202,17 +199,14 @@ class AnthropicFileUploader(FileUploader):
client = self._get_async_client()
content = await file.aread()
file_purpose = purpose or "user_upload"
file_data = io.BytesIO(content)
logger.info(
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
)
uploaded_file = await client.files.create(
file=(file.filename, file_data, file.content_type),
purpose=file_purpose,
filename = file.filename or generate_filename(file.content_type)
uploaded_file = await client.beta.files.upload(
file=(filename, content, file.content_type),
)
logger.info(f"Uploaded to Anthropic: {uploaded_file.id}")
@@ -240,7 +234,7 @@ class AnthropicFileUploader(FileUploader):
"""
try:
client = self._get_async_client()
await client.files.delete(file_id=file_id)
await client.beta.files.delete(file_id=file_id)
logger.info(f"Deleted Anthropic file: {file_id}")
return True
except Exception as e:

View File

@@ -9,7 +9,7 @@ import os
from typing import Any
from crewai_files.core.constants import DEFAULT_UPLOAD_CHUNK_SIZE, FILES_API_MAX_SIZE
from crewai_files.core.sources import FileBytes, FilePath, FileStream
from crewai_files.core.sources import FileBytes, FilePath, FileStream, generate_filename
from crewai_files.core.types import FileInput
from crewai_files.processing.exceptions import (
PermanentUploadError,
@@ -22,6 +22,27 @@ from crewai_files.uploaders.base import FileUploader, UploadResult
logger = logging.getLogger(__name__)
def _get_purpose_for_content_type(content_type: str, purpose: str | None) -> str:
"""Get the appropriate purpose for a file based on content type.
OpenAI Files API requires different purposes for different file types:
- Images (for Responses API vision): "vision"
- PDFs and other documents: "user_data"
Args:
content_type: MIME type of the file.
purpose: Optional explicit purpose override.
Returns:
The purpose string to use for upload.
"""
if purpose is not None:
return purpose
if content_type.startswith("image/"):
return "vision"
return "user_data"
def _get_file_size(file: FileInput) -> int | None:
"""Get file size without reading content if possible.
@@ -219,13 +240,14 @@ class OpenAIFileUploader(FileUploader):
UploadResult with the file ID and metadata.
"""
client = self._get_client()
file_purpose = purpose or "user_data"
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
filename = file.filename or generate_filename(file.content_type)
file_data = io.BytesIO(content)
file_data.name = file.filename or "file"
file_data.name = filename
logger.info(
f"Uploading file '{file.filename}' to OpenAI Files API ({len(content)} bytes)"
f"Uploading file '{filename}' to OpenAI Files API ({len(content)} bytes)"
)
uploaded_file = client.files.create(
@@ -254,8 +276,8 @@ class OpenAIFileUploader(FileUploader):
UploadResult with the file ID and metadata.
"""
client = self._get_client()
file_purpose = purpose or "user_data"
filename = file.filename or "file"
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
filename = file.filename or generate_filename(file.content_type)
file_size = len(content)
logger.info(
@@ -329,8 +351,8 @@ class OpenAIFileUploader(FileUploader):
UploadResult with the file ID and metadata.
"""
client = self._get_client()
file_purpose = purpose or "user_data"
filename = file.filename or "file"
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
filename = file.filename or generate_filename(file.content_type)
logger.info(
f"Uploading file '{filename}' to OpenAI Uploads API (streaming) "
@@ -496,10 +518,10 @@ class OpenAIFileUploader(FileUploader):
UploadResult with the file ID and metadata.
"""
client = self._get_async_client()
file_purpose = purpose or "user_data"
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
file_data = io.BytesIO(content)
file_data.name = file.filename or "file"
file_data.name = file.filename or generate_filename(file.content_type)
logger.info(
f"Uploading file '{file.filename}' to OpenAI Files API ({len(content)} bytes)"
@@ -531,8 +553,8 @@ class OpenAIFileUploader(FileUploader):
UploadResult with the file ID and metadata.
"""
client = self._get_async_client()
file_purpose = purpose or "user_data"
filename = file.filename or "file"
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
filename = file.filename or generate_filename(file.content_type)
file_size = len(content)
logger.info(
@@ -606,8 +628,8 @@ class OpenAIFileUploader(FileUploader):
UploadResult with the file ID and metadata.
"""
client = self._get_async_client()
file_purpose = purpose or "user_data"
filename = file.filename or "file"
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
filename = file.filename or generate_filename(file.content_type)
logger.info(
f"Uploading file '{filename}' to OpenAI Uploads API (streaming) "

File diff suppressed because one or more lines are too long

View File

@@ -1,7 +1,8 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "Summarize what this text file says in
one sentence."}], "role": "user"}], "generationConfig": {}}'
one sentence."}, {"inlineData": {"data": "UmV2aWV3IEd1aWRlbGluZXMKCjEuIEJlIGNsZWFyIGFuZCBjb25jaXNlOiBXcml0ZSBmZWVkYmFjayB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4KMi4gRm9jdXMgb24gYmVoYXZpb3IgYW5kIG91dGNvbWVzOiBEZXNjcmliZSB3aGF0IGhhcHBlbmVkIGFuZCB3aHkgaXQgbWF0dGVycy4KMy4gQmUgc3BlY2lmaWM6IFByb3ZpZGUgZXhhbXBsZXMgdG8gc3VwcG9ydCB5b3VyIHBvaW50cy4KNC4gQmFsYW5jZSBwb3NpdGl2ZXMgYW5kIGltcHJvdmVtZW50czogSGlnaGxpZ2h0IHN0cmVuZ3RocyBhbmQgYXJlYXMgdG8gZ3Jvdy4KNS4gQmUgcmVzcGVjdGZ1bCBhbmQgY29uc3RydWN0aXZlOiBBc3N1bWUgcG9zaXRpdmUgaW50ZW50IGFuZCBvZmZlciBzb2x1dGlvbnMuCjYuIFVzZSBvYmplY3RpdmUgY3JpdGVyaWE6IFJlZmVyZW5jZSBnb2FscywgbWV0cmljcywgb3IgZXhwZWN0YXRpb25zIHdoZXJlIHBvc3NpYmxlLgo3LiBTdWdnZXN0IG5leHQgc3RlcHM6IFJlY29tbWVuZCBhY3Rpb25hYmxlIHdheXMgdG8gaW1wcm92ZS4KOC4gUHJvb2ZyZWFkOiBDaGVjayB0b25lLCBncmFtbWFyLCBhbmQgY2xhcml0eSBiZWZvcmUgc3VibWl0dGluZy4K",
"mimeType": "text/plain"}}], "role": "user"}], "generationConfig": {}}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -12,7 +13,7 @@ interactions:
connection:
- keep-alive
content-length:
- '132'
- '976'
content-type:
- application/json
host:
@@ -26,27 +27,28 @@ interactions:
response:
body:
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
[\n {\n \"text\": \"Please provide the text file so I
can summarize it for you. I need the content of the file to be able to understand
and summarize it in one sentence.\\n\"\n }\n ],\n \"role\":
[\n {\n \"text\": \"The text file outlines guidelines
for providing effective feedback, emphasizing clarity, specificity, a balance
of positive and constructive criticism, respect, objectivity, actionable suggestions,
and careful proofreading.\\n\"\n }\n ],\n \"role\":
\"model\"\n },\n \"finishReason\": \"STOP\",\n \"avgLogprobs\":
-0.17782547979643851\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
11,\n \"candidatesTokenCount\": 33,\n \"totalTokenCount\": 44,\n \"promptTokensDetails\":
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 11\n
-0.17109338442484537\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
136,\n \"candidatesTokenCount\": 36,\n \"totalTokenCount\": 172,\n \"promptTokensDetails\":
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 136\n
\ }\n ],\n \"candidatesTokensDetails\": [\n {\n \"modality\":
\"TEXT\",\n \"tokenCount\": 33\n }\n ]\n },\n \"modelVersion\":
\"gemini-2.0-flash\",\n \"responseId\": \"b-dyabKwN8a9jrEP7JT1yAo\"\n}\n"
\"TEXT\",\n \"tokenCount\": 36\n }\n ]\n },\n \"modelVersion\":
\"gemini-2.0-flash\",\n \"responseId\": \"wxZzaYaiGYG2_uMPtMjFiAw\"\n}\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Type:
- application/json; charset=UTF-8
Date:
- Fri, 23 Jan 2026 03:13:52 GMT
- Fri, 23 Jan 2026 06:35:48 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=631
- gfet4t7; dur=675
Transfer-Encoding:
- chunked
Vary:

View File

@@ -0,0 +1,67 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "Summarize what this text says in one
sentence."}, {"inlineData": {"data": "UmV2aWV3IEd1aWRlbGluZXMKCjEuIEJlIGNsZWFyIGFuZCBjb25jaXNlOiBXcml0ZSBmZWVkYmFjayB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4KMi4gRm9jdXMgb24gYmVoYXZpb3IgYW5kIG91dGNvbWVzOiBEZXNjcmliZSB3aGF0IGhhcHBlbmVkIGFuZCB3aHkgaXQgbWF0dGVycy4KMy4gQmUgc3BlY2lmaWM6IFByb3ZpZGUgZXhhbXBsZXMgdG8gc3VwcG9ydCB5b3VyIHBvaW50cy4KNC4gQmFsYW5jZSBwb3NpdGl2ZXMgYW5kIGltcHJvdmVtZW50czogSGlnaGxpZ2h0IHN0cmVuZ3RocyBhbmQgYXJlYXMgdG8gZ3Jvdy4KNS4gQmUgcmVzcGVjdGZ1bCBhbmQgY29uc3RydWN0aXZlOiBBc3N1bWUgcG9zaXRpdmUgaW50ZW50IGFuZCBvZmZlciBzb2x1dGlvbnMuCjYuIFVzZSBvYmplY3RpdmUgY3JpdGVyaWE6IFJlZmVyZW5jZSBnb2FscywgbWV0cmljcywgb3IgZXhwZWN0YXRpb25zIHdoZXJlIHBvc3NpYmxlLgo3LiBTdWdnZXN0IG5leHQgc3RlcHM6IFJlY29tbWVuZCBhY3Rpb25hYmxlIHdheXMgdG8gaW1wcm92ZS4KOC4gUHJvb2ZyZWFkOiBDaGVjayB0b25lLCBncmFtbWFyLCBhbmQgY2xhcml0eSBiZWZvcmUgc3VibWl0dGluZy4K",
"mimeType": "text/plain"}}], "role": "user"}], "generationConfig": {}}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- '*/*'
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '971'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
x-goog-api-client:
- google-genai-sdk/1.49.0 gl-python/3.12.10
x-goog-api-key:
- X-GOOG-API-KEY-XXX
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
response:
body:
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
[\n {\n \"text\": \"Effective review feedback should be
clear, specific, balanced, respectful, and constructive, focusing on behaviors
and outcomes with examples, objective criteria, and suggested next steps,
ensuring it is proofread for clarity.\\n\"\n }\n ],\n \"role\":
\"model\"\n },\n \"finishReason\": \"STOP\",\n \"avgLogprobs\":
-0.35489303309743\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
135,\n \"candidatesTokenCount\": 41,\n \"totalTokenCount\": 176,\n \"promptTokensDetails\":
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 135\n
\ }\n ],\n \"candidatesTokensDetails\": [\n {\n \"modality\":
\"TEXT\",\n \"tokenCount\": 41\n }\n ]\n },\n \"modelVersion\":
\"gemini-2.0-flash\",\n \"responseId\": \"xBZzaY2tCsa9jrEP7JT1yAo\"\n}\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Type:
- application/json; charset=UTF-8
Date:
- Fri, 23 Jan 2026 06:35:48 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=732
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
X-Frame-Options:
- X-FRAME-OPTIONS-XXX
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1

View File

@@ -18,6 +18,7 @@ from crewai_files import (
VideoFile,
format_multimodal_content,
)
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
# Path to test data files
@@ -559,6 +560,153 @@ class TestGenericFileIntegration:
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
def _build_multimodal_message_with_upload(
llm: LLM, prompt: str, files: dict
) -> tuple[list[dict], list[dict]]:
"""Build a multimodal message using file_id uploads instead of inline base64.
Note: OpenAI Chat Completions API only supports file_id for PDFs via
type="file", not for images. For image file_id support, OpenAI requires
the Responses API (type="input_image"). Since crewAI uses Chat Completions,
we test file_id uploads with Anthropic which supports file_id for all types.
Returns:
Tuple of (messages, content_blocks) where content_blocks can be inspected
to verify file_id was used.
"""
from crewai_files.formatting.anthropic import AnthropicFormatter
config = FileResolverConfig(prefer_upload=True)
resolver = FileResolver(config=config)
formatter = AnthropicFormatter()
content_blocks = []
for file in files.values():
resolved = resolver.resolve(file, "anthropic")
block = formatter.format_block(file, resolved)
if block is not None:
content_blocks.append(block)
messages = [
{
"role": "user",
"content": [
llm.format_text_content(prompt),
*content_blocks,
],
}
]
return messages, content_blocks
def _build_responses_message_with_upload(
llm: LLM, prompt: str, files: dict
) -> tuple[list[dict], list[dict]]:
"""Build a Responses API message using file_id uploads.
The Responses API supports file_id for images via type="input_image".
Returns:
Tuple of (messages, content_blocks) where content_blocks can be inspected
to verify file_id was used.
"""
from crewai_files.formatting import OpenAIResponsesFormatter
config = FileResolverConfig(prefer_upload=True)
resolver = FileResolver(config=config)
content_blocks = []
for file in files.values():
resolved = resolver.resolve(file, "openai")
block = OpenAIResponsesFormatter.format_block(resolved, file.content_type)
content_blocks.append(block)
messages = [
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt},
*content_blocks,
],
}
]
return messages, content_blocks
class TestAnthropicFileUploadIntegration:
"""Integration tests for Anthropic multimodal with file_id uploads.
We test file_id uploads with Anthropic because OpenAI Chat Completions API
only supports file_id references for PDFs (type="file"), not images.
OpenAI's Responses API supports image file_id (type="input_image"), but
crewAI currently uses Chat Completions. Anthropic supports file_id for
all content types including images.
"""
@pytest.mark.vcr()
def test_describe_image_with_file_id(self, test_image_bytes: bytes) -> None:
"""Test Anthropic can describe an image uploaded via Files API."""
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
files = {"image": ImageFile(source=test_image_bytes)}
messages, content_blocks = _build_multimodal_message_with_upload(
llm,
"Describe this image in one sentence. Be brief.",
files,
)
# Verify we're using file_id, not base64
assert len(content_blocks) == 1
source = content_blocks[0].get("source", {})
assert source.get("type") == "file", (
f"Expected source type 'file' for file_id upload, got '{source.get('type')}'. "
"This test verifies file_id uploads work - if falling back to base64, "
"check that the Anthropic Files API uploader is working correctly."
)
assert "file_id" in source, "Expected file_id in source for file_id upload"
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
class TestOpenAIResponsesFileUploadIntegration:
"""Integration tests for OpenAI Responses API with file_id uploads.
The Responses API supports file_id for images via type="input_image",
unlike Chat Completions which only supports file_id for PDFs.
"""
@pytest.mark.vcr()
def test_describe_image_with_file_id(self, test_image_bytes: bytes) -> None:
"""Test OpenAI Responses API can describe an image uploaded via Files API."""
llm = LLM(model="openai/gpt-4o-mini", api="responses")
files = {"image": ImageFile(source=test_image_bytes)}
messages, content_blocks = _build_responses_message_with_upload(
llm,
"Describe this image in one sentence. Be brief.",
files,
)
# Verify we're using file_id with input_image type
assert len(content_blocks) == 1
block = content_blocks[0]
assert block.get("type") == "input_image", (
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'. "
"This test verifies file_id uploads work with the Responses API."
)
assert "file_id" in block, "Expected file_id in block for file_id upload"
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0