mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-23 07:08:14 +00:00
feat(files): add file_id upload support and text file handling
- Add VCR patch for binary request bodies (base64 encoding fallback) - Add generate_filename() utility for UUID-based filenames with extension - Add OpenAIResponsesFormatter for Responses API (input_image, input_file) - Fix OpenAI uploader to use 'vision' purpose for images - Fix Anthropic uploader to use tuple format (filename, content, content_type) - Add TextConstraints and text support for Gemini - Add file_id upload integration tests for Anthropic and OpenAI Responses API
This commit is contained in:
21
conftest.py
21
conftest.py
@@ -1,5 +1,6 @@
|
||||
"""Pytest configuration for crewAI workspace."""
|
||||
|
||||
import base64
|
||||
from collections.abc import Generator
|
||||
import gzip
|
||||
import os
|
||||
@@ -10,6 +11,7 @@ from typing import Any
|
||||
from dotenv import load_dotenv
|
||||
import pytest
|
||||
from vcr.request import Request # type: ignore[import-untyped]
|
||||
import vcr.stubs.httpx_stubs as httpx_stubs # type: ignore[import-untyped]
|
||||
|
||||
|
||||
env_test_path = Path(__file__).parent / ".env.test"
|
||||
@@ -17,6 +19,25 @@ load_dotenv(env_test_path, override=True)
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
def _patched_make_vcr_request(httpx_request: Any, **kwargs: Any) -> Any:
|
||||
"""Patched version of VCR's _make_vcr_request that handles binary content.
|
||||
|
||||
The original implementation fails on binary request bodies (like file uploads)
|
||||
because it assumes all content can be decoded as UTF-8.
|
||||
"""
|
||||
raw_body = httpx_request.read()
|
||||
try:
|
||||
body = raw_body.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
body = base64.b64encode(raw_body).decode("ascii")
|
||||
uri = str(httpx_request.url)
|
||||
headers = dict(httpx_request.headers)
|
||||
return Request(httpx_request.method, uri, body, headers)
|
||||
|
||||
|
||||
httpx_stubs._make_vcr_request = _patched_make_vcr_request
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="function")
|
||||
def cleanup_event_handlers() -> Generator[None, Any, None]:
|
||||
"""Clean up event bus handlers after each test to prevent test pollution."""
|
||||
|
||||
@@ -54,7 +54,7 @@ class FileOperationMetrics:
|
||||
}
|
||||
|
||||
if self.filename:
|
||||
result["filename"] = self.filename
|
||||
result["file_name"] = self.filename
|
||||
if self.provider:
|
||||
result["provider"] = self.provider
|
||||
if self.size_bytes is not None:
|
||||
|
||||
@@ -64,6 +64,21 @@ def _fallback_content_type(filename: str | None) -> str:
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def generate_filename(content_type: str) -> str:
|
||||
"""Generate a UUID-based filename with extension from content type.
|
||||
|
||||
Args:
|
||||
content_type: MIME type to derive extension from.
|
||||
|
||||
Returns:
|
||||
Filename in format "{uuid}{ext}" where ext includes the dot.
|
||||
"""
|
||||
import uuid
|
||||
|
||||
ext = mimetypes.guess_extension(content_type) or ""
|
||||
return f"{uuid.uuid4()}{ext}"
|
||||
|
||||
|
||||
def detect_content_type(data: bytes, filename: str | None = None) -> str:
|
||||
"""Detect MIME type from file content.
|
||||
|
||||
|
||||
@@ -4,9 +4,11 @@ from crewai_files.formatting.api import (
|
||||
aformat_multimodal_content,
|
||||
format_multimodal_content,
|
||||
)
|
||||
from crewai_files.formatting.openai import OpenAIResponsesFormatter
|
||||
|
||||
|
||||
__all__ = [
|
||||
"OpenAIResponsesFormatter",
|
||||
"aformat_multimodal_content",
|
||||
"format_multimodal_content",
|
||||
]
|
||||
|
||||
@@ -186,6 +186,11 @@ def _get_supported_types(
|
||||
supported.append("audio/")
|
||||
if constraints.video is not None:
|
||||
supported.append("video/")
|
||||
if constraints.text is not None:
|
||||
supported.append("text/")
|
||||
supported.append("application/json")
|
||||
supported.append("application/xml")
|
||||
supported.append("application/x-yaml")
|
||||
return supported
|
||||
|
||||
|
||||
|
||||
@@ -14,6 +14,95 @@ from crewai_files.core.resolved import (
|
||||
)
|
||||
|
||||
|
||||
class OpenAIResponsesFormatter:
|
||||
"""Formats resolved files into OpenAI Responses API content blocks.
|
||||
|
||||
The Responses API uses a different format than Chat Completions:
|
||||
- Images use `type: "input_image"` with `file_id` or `image_url`
|
||||
- PDFs use `type: "input_file"` with `file_id`, `file_url`, or `file_data`
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def format_block(resolved: ResolvedFileType, content_type: str) -> dict[str, Any]:
|
||||
"""Format a resolved file into an OpenAI Responses API content block.
|
||||
|
||||
Args:
|
||||
resolved: Resolved file.
|
||||
content_type: MIME type of the file.
|
||||
|
||||
Returns:
|
||||
Content block dict.
|
||||
|
||||
Raises:
|
||||
TypeError: If resolved type is not supported.
|
||||
"""
|
||||
is_image = content_type.startswith("image/")
|
||||
is_pdf = content_type == "application/pdf"
|
||||
|
||||
if isinstance(resolved, FileReference):
|
||||
if is_image:
|
||||
return {
|
||||
"type": "input_image",
|
||||
"file_id": resolved.file_id,
|
||||
}
|
||||
if is_pdf:
|
||||
return {
|
||||
"type": "input_file",
|
||||
"file_id": resolved.file_id,
|
||||
}
|
||||
raise TypeError(
|
||||
f"Unsupported content type for Responses API: {content_type}"
|
||||
)
|
||||
|
||||
if isinstance(resolved, UrlReference):
|
||||
if is_image:
|
||||
return {
|
||||
"type": "input_image",
|
||||
"image_url": resolved.url,
|
||||
}
|
||||
if is_pdf:
|
||||
return {
|
||||
"type": "input_file",
|
||||
"file_url": resolved.url,
|
||||
}
|
||||
raise TypeError(
|
||||
f"Unsupported content type for Responses API: {content_type}"
|
||||
)
|
||||
|
||||
if isinstance(resolved, InlineBase64):
|
||||
if is_image:
|
||||
return {
|
||||
"type": "input_image",
|
||||
"image_url": f"data:{resolved.content_type};base64,{resolved.data}",
|
||||
}
|
||||
if is_pdf:
|
||||
return {
|
||||
"type": "input_file",
|
||||
"file_data": f"data:{resolved.content_type};base64,{resolved.data}",
|
||||
}
|
||||
raise TypeError(
|
||||
f"Unsupported content type for Responses API: {content_type}"
|
||||
)
|
||||
|
||||
if isinstance(resolved, InlineBytes):
|
||||
data = base64.b64encode(resolved.data).decode("ascii")
|
||||
if is_image:
|
||||
return {
|
||||
"type": "input_image",
|
||||
"image_url": f"data:{resolved.content_type};base64,{data}",
|
||||
}
|
||||
if is_pdf:
|
||||
return {
|
||||
"type": "input_file",
|
||||
"file_data": f"data:{resolved.content_type};base64,{data}",
|
||||
}
|
||||
raise TypeError(
|
||||
f"Unsupported content type for Responses API: {content_type}"
|
||||
)
|
||||
|
||||
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
|
||||
|
||||
|
||||
class OpenAIFormatter:
|
||||
"""Formats resolved files into OpenAI content blocks."""
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Literal
|
||||
from crewai_files.core.types import (
|
||||
AudioMimeType,
|
||||
ImageMimeType,
|
||||
TextContentType,
|
||||
VideoMimeType,
|
||||
)
|
||||
|
||||
@@ -72,6 +73,27 @@ GEMINI_VIDEO_FORMATS: tuple[VideoMimeType, ...] = (
|
||||
"video/x-flv",
|
||||
)
|
||||
|
||||
DEFAULT_TEXT_FORMATS: tuple[TextContentType, ...] = (
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"text/csv",
|
||||
"application/json",
|
||||
"text/xml",
|
||||
"text/html",
|
||||
)
|
||||
|
||||
GEMINI_TEXT_FORMATS: tuple[TextContentType, ...] = (
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"text/csv",
|
||||
"application/json",
|
||||
"application/xml",
|
||||
"text/xml",
|
||||
"application/x-yaml",
|
||||
"text/yaml",
|
||||
"text/html",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ImageConstraints:
|
||||
@@ -135,6 +157,19 @@ class VideoConstraints:
|
||||
supported_formats: tuple[VideoMimeType, ...] = DEFAULT_VIDEO_FORMATS
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TextConstraints:
|
||||
"""Constraints for text files.
|
||||
|
||||
Attributes:
|
||||
max_size_bytes: Maximum file size in bytes.
|
||||
supported_formats: Supported text MIME types.
|
||||
"""
|
||||
|
||||
max_size_bytes: int
|
||||
supported_formats: tuple[TextContentType, ...] = DEFAULT_TEXT_FORMATS
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderConstraints:
|
||||
"""Complete set of constraints for a provider.
|
||||
@@ -145,6 +180,7 @@ class ProviderConstraints:
|
||||
pdf: PDF file constraints.
|
||||
audio: Audio file constraints.
|
||||
video: Video file constraints.
|
||||
text: Text file constraints.
|
||||
general_max_size_bytes: Maximum size for any file type.
|
||||
supports_file_upload: Whether the provider supports file upload APIs.
|
||||
file_upload_threshold_bytes: Size threshold above which to use file upload.
|
||||
@@ -156,6 +192,7 @@ class ProviderConstraints:
|
||||
pdf: PDFConstraints | None = None
|
||||
audio: AudioConstraints | None = None
|
||||
video: VideoConstraints | None = None
|
||||
text: TextConstraints | None = None
|
||||
general_max_size_bytes: int | None = None
|
||||
supports_file_upload: bool = False
|
||||
file_upload_threshold_bytes: int | None = None
|
||||
@@ -213,6 +250,10 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
|
||||
max_duration_seconds=3600, # 1 hour at default resolution
|
||||
supported_formats=GEMINI_VIDEO_FORMATS,
|
||||
),
|
||||
text=TextConstraints(
|
||||
max_size_bytes=104_857_600,
|
||||
supported_formats=GEMINI_TEXT_FORMATS,
|
||||
),
|
||||
supports_file_upload=True,
|
||||
file_upload_threshold_bytes=20_971_520,
|
||||
supports_url_references=True,
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from crewai_files.core.sources import generate_filename
|
||||
from crewai_files.core.types import FileInput
|
||||
from crewai_files.processing.exceptions import classify_upload_error
|
||||
from crewai_files.uploaders.base import FileUploader, UploadResult
|
||||
@@ -91,17 +91,14 @@ class AnthropicFileUploader(FileUploader):
|
||||
client = self._get_client()
|
||||
|
||||
content = file.read()
|
||||
file_purpose = purpose or "user_upload"
|
||||
|
||||
file_data = io.BytesIO(content)
|
||||
|
||||
logger.info(
|
||||
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
|
||||
)
|
||||
|
||||
uploaded_file = client.files.create(
|
||||
file=(file.filename, file_data, file.content_type),
|
||||
purpose=file_purpose,
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
uploaded_file = client.beta.files.upload(
|
||||
file=(filename, content, file.content_type),
|
||||
)
|
||||
|
||||
logger.info(f"Uploaded to Anthropic: {uploaded_file.id}")
|
||||
@@ -129,7 +126,7 @@ class AnthropicFileUploader(FileUploader):
|
||||
"""
|
||||
try:
|
||||
client = self._get_client()
|
||||
client.files.delete(file_id=file_id)
|
||||
client.beta.files.delete(file_id=file_id)
|
||||
logger.info(f"Deleted Anthropic file: {file_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
@@ -147,7 +144,7 @@ class AnthropicFileUploader(FileUploader):
|
||||
"""
|
||||
try:
|
||||
client = self._get_client()
|
||||
file_info = client.files.retrieve(file_id=file_id)
|
||||
file_info = client.beta.files.retrieve(file_id=file_id)
|
||||
return {
|
||||
"id": file_info.id,
|
||||
"filename": file_info.filename,
|
||||
@@ -167,7 +164,7 @@ class AnthropicFileUploader(FileUploader):
|
||||
"""
|
||||
try:
|
||||
client = self._get_client()
|
||||
files = client.files.list()
|
||||
files = client.beta.files.list()
|
||||
return [
|
||||
{
|
||||
"id": f.id,
|
||||
@@ -202,17 +199,14 @@ class AnthropicFileUploader(FileUploader):
|
||||
client = self._get_async_client()
|
||||
|
||||
content = await file.aread()
|
||||
file_purpose = purpose or "user_upload"
|
||||
|
||||
file_data = io.BytesIO(content)
|
||||
|
||||
logger.info(
|
||||
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
|
||||
)
|
||||
|
||||
uploaded_file = await client.files.create(
|
||||
file=(file.filename, file_data, file.content_type),
|
||||
purpose=file_purpose,
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
uploaded_file = await client.beta.files.upload(
|
||||
file=(filename, content, file.content_type),
|
||||
)
|
||||
|
||||
logger.info(f"Uploaded to Anthropic: {uploaded_file.id}")
|
||||
@@ -240,7 +234,7 @@ class AnthropicFileUploader(FileUploader):
|
||||
"""
|
||||
try:
|
||||
client = self._get_async_client()
|
||||
await client.files.delete(file_id=file_id)
|
||||
await client.beta.files.delete(file_id=file_id)
|
||||
logger.info(f"Deleted Anthropic file: {file_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
|
||||
@@ -9,7 +9,7 @@ import os
|
||||
from typing import Any
|
||||
|
||||
from crewai_files.core.constants import DEFAULT_UPLOAD_CHUNK_SIZE, FILES_API_MAX_SIZE
|
||||
from crewai_files.core.sources import FileBytes, FilePath, FileStream
|
||||
from crewai_files.core.sources import FileBytes, FilePath, FileStream, generate_filename
|
||||
from crewai_files.core.types import FileInput
|
||||
from crewai_files.processing.exceptions import (
|
||||
PermanentUploadError,
|
||||
@@ -22,6 +22,27 @@ from crewai_files.uploaders.base import FileUploader, UploadResult
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_purpose_for_content_type(content_type: str, purpose: str | None) -> str:
|
||||
"""Get the appropriate purpose for a file based on content type.
|
||||
|
||||
OpenAI Files API requires different purposes for different file types:
|
||||
- Images (for Responses API vision): "vision"
|
||||
- PDFs and other documents: "user_data"
|
||||
|
||||
Args:
|
||||
content_type: MIME type of the file.
|
||||
purpose: Optional explicit purpose override.
|
||||
|
||||
Returns:
|
||||
The purpose string to use for upload.
|
||||
"""
|
||||
if purpose is not None:
|
||||
return purpose
|
||||
if content_type.startswith("image/"):
|
||||
return "vision"
|
||||
return "user_data"
|
||||
|
||||
|
||||
def _get_file_size(file: FileInput) -> int | None:
|
||||
"""Get file size without reading content if possible.
|
||||
|
||||
@@ -219,13 +240,14 @@ class OpenAIFileUploader(FileUploader):
|
||||
UploadResult with the file ID and metadata.
|
||||
"""
|
||||
client = self._get_client()
|
||||
file_purpose = purpose or "user_data"
|
||||
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
|
||||
file_data = io.BytesIO(content)
|
||||
file_data.name = file.filename or "file"
|
||||
file_data.name = filename
|
||||
|
||||
logger.info(
|
||||
f"Uploading file '{file.filename}' to OpenAI Files API ({len(content)} bytes)"
|
||||
f"Uploading file '{filename}' to OpenAI Files API ({len(content)} bytes)"
|
||||
)
|
||||
|
||||
uploaded_file = client.files.create(
|
||||
@@ -254,8 +276,8 @@ class OpenAIFileUploader(FileUploader):
|
||||
UploadResult with the file ID and metadata.
|
||||
"""
|
||||
client = self._get_client()
|
||||
file_purpose = purpose or "user_data"
|
||||
filename = file.filename or "file"
|
||||
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
file_size = len(content)
|
||||
|
||||
logger.info(
|
||||
@@ -329,8 +351,8 @@ class OpenAIFileUploader(FileUploader):
|
||||
UploadResult with the file ID and metadata.
|
||||
"""
|
||||
client = self._get_client()
|
||||
file_purpose = purpose or "user_data"
|
||||
filename = file.filename or "file"
|
||||
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
|
||||
logger.info(
|
||||
f"Uploading file '{filename}' to OpenAI Uploads API (streaming) "
|
||||
@@ -496,10 +518,10 @@ class OpenAIFileUploader(FileUploader):
|
||||
UploadResult with the file ID and metadata.
|
||||
"""
|
||||
client = self._get_async_client()
|
||||
file_purpose = purpose or "user_data"
|
||||
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
|
||||
|
||||
file_data = io.BytesIO(content)
|
||||
file_data.name = file.filename or "file"
|
||||
file_data.name = file.filename or generate_filename(file.content_type)
|
||||
|
||||
logger.info(
|
||||
f"Uploading file '{file.filename}' to OpenAI Files API ({len(content)} bytes)"
|
||||
@@ -531,8 +553,8 @@ class OpenAIFileUploader(FileUploader):
|
||||
UploadResult with the file ID and metadata.
|
||||
"""
|
||||
client = self._get_async_client()
|
||||
file_purpose = purpose or "user_data"
|
||||
filename = file.filename or "file"
|
||||
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
file_size = len(content)
|
||||
|
||||
logger.info(
|
||||
@@ -606,8 +628,8 @@ class OpenAIFileUploader(FileUploader):
|
||||
UploadResult with the file ID and metadata.
|
||||
"""
|
||||
client = self._get_async_client()
|
||||
file_purpose = purpose or "user_data"
|
||||
filename = file.filename or "file"
|
||||
file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
|
||||
filename = file.filename or generate_filename(file.content_type)
|
||||
|
||||
logger.info(
|
||||
f"Uploading file '{filename}' to OpenAI Uploads API (streaming) "
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,7 +1,8 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"contents": [{"parts": [{"text": "Summarize what this text file says in
|
||||
one sentence."}], "role": "user"}], "generationConfig": {}}'
|
||||
one sentence."}, {"inlineData": {"data": "UmV2aWV3IEd1aWRlbGluZXMKCjEuIEJlIGNsZWFyIGFuZCBjb25jaXNlOiBXcml0ZSBmZWVkYmFjayB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4KMi4gRm9jdXMgb24gYmVoYXZpb3IgYW5kIG91dGNvbWVzOiBEZXNjcmliZSB3aGF0IGhhcHBlbmVkIGFuZCB3aHkgaXQgbWF0dGVycy4KMy4gQmUgc3BlY2lmaWM6IFByb3ZpZGUgZXhhbXBsZXMgdG8gc3VwcG9ydCB5b3VyIHBvaW50cy4KNC4gQmFsYW5jZSBwb3NpdGl2ZXMgYW5kIGltcHJvdmVtZW50czogSGlnaGxpZ2h0IHN0cmVuZ3RocyBhbmQgYXJlYXMgdG8gZ3Jvdy4KNS4gQmUgcmVzcGVjdGZ1bCBhbmQgY29uc3RydWN0aXZlOiBBc3N1bWUgcG9zaXRpdmUgaW50ZW50IGFuZCBvZmZlciBzb2x1dGlvbnMuCjYuIFVzZSBvYmplY3RpdmUgY3JpdGVyaWE6IFJlZmVyZW5jZSBnb2FscywgbWV0cmljcywgb3IgZXhwZWN0YXRpb25zIHdoZXJlIHBvc3NpYmxlLgo3LiBTdWdnZXN0IG5leHQgc3RlcHM6IFJlY29tbWVuZCBhY3Rpb25hYmxlIHdheXMgdG8gaW1wcm92ZS4KOC4gUHJvb2ZyZWFkOiBDaGVjayB0b25lLCBncmFtbWFyLCBhbmQgY2xhcml0eSBiZWZvcmUgc3VibWl0dGluZy4K",
|
||||
"mimeType": "text/plain"}}], "role": "user"}], "generationConfig": {}}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
@@ -12,7 +13,7 @@ interactions:
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '132'
|
||||
- '976'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
@@ -26,27 +27,28 @@ interactions:
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
|
||||
[\n {\n \"text\": \"Please provide the text file so I
|
||||
can summarize it for you. I need the content of the file to be able to understand
|
||||
and summarize it in one sentence.\\n\"\n }\n ],\n \"role\":
|
||||
[\n {\n \"text\": \"The text file outlines guidelines
|
||||
for providing effective feedback, emphasizing clarity, specificity, a balance
|
||||
of positive and constructive criticism, respect, objectivity, actionable suggestions,
|
||||
and careful proofreading.\\n\"\n }\n ],\n \"role\":
|
||||
\"model\"\n },\n \"finishReason\": \"STOP\",\n \"avgLogprobs\":
|
||||
-0.17782547979643851\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
|
||||
11,\n \"candidatesTokenCount\": 33,\n \"totalTokenCount\": 44,\n \"promptTokensDetails\":
|
||||
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 11\n
|
||||
-0.17109338442484537\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
|
||||
136,\n \"candidatesTokenCount\": 36,\n \"totalTokenCount\": 172,\n \"promptTokensDetails\":
|
||||
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 136\n
|
||||
\ }\n ],\n \"candidatesTokensDetails\": [\n {\n \"modality\":
|
||||
\"TEXT\",\n \"tokenCount\": 33\n }\n ]\n },\n \"modelVersion\":
|
||||
\"gemini-2.0-flash\",\n \"responseId\": \"b-dyabKwN8a9jrEP7JT1yAo\"\n}\n"
|
||||
\"TEXT\",\n \"tokenCount\": 36\n }\n ]\n },\n \"modelVersion\":
|
||||
\"gemini-2.0-flash\",\n \"responseId\": \"wxZzaYaiGYG2_uMPtMjFiAw\"\n}\n"
|
||||
headers:
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Type:
|
||||
- application/json; charset=UTF-8
|
||||
Date:
|
||||
- Fri, 23 Jan 2026 03:13:52 GMT
|
||||
- Fri, 23 Jan 2026 06:35:48 GMT
|
||||
Server:
|
||||
- scaffolding on HTTPServer2
|
||||
Server-Timing:
|
||||
- gfet4t7; dur=631
|
||||
- gfet4t7; dur=675
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
Vary:
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"contents": [{"parts": [{"text": "Summarize what this text says in one
|
||||
sentence."}, {"inlineData": {"data": "UmV2aWV3IEd1aWRlbGluZXMKCjEuIEJlIGNsZWFyIGFuZCBjb25jaXNlOiBXcml0ZSBmZWVkYmFjayB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4KMi4gRm9jdXMgb24gYmVoYXZpb3IgYW5kIG91dGNvbWVzOiBEZXNjcmliZSB3aGF0IGhhcHBlbmVkIGFuZCB3aHkgaXQgbWF0dGVycy4KMy4gQmUgc3BlY2lmaWM6IFByb3ZpZGUgZXhhbXBsZXMgdG8gc3VwcG9ydCB5b3VyIHBvaW50cy4KNC4gQmFsYW5jZSBwb3NpdGl2ZXMgYW5kIGltcHJvdmVtZW50czogSGlnaGxpZ2h0IHN0cmVuZ3RocyBhbmQgYXJlYXMgdG8gZ3Jvdy4KNS4gQmUgcmVzcGVjdGZ1bCBhbmQgY29uc3RydWN0aXZlOiBBc3N1bWUgcG9zaXRpdmUgaW50ZW50IGFuZCBvZmZlciBzb2x1dGlvbnMuCjYuIFVzZSBvYmplY3RpdmUgY3JpdGVyaWE6IFJlZmVyZW5jZSBnb2FscywgbWV0cmljcywgb3IgZXhwZWN0YXRpb25zIHdoZXJlIHBvc3NpYmxlLgo3LiBTdWdnZXN0IG5leHQgc3RlcHM6IFJlY29tbWVuZCBhY3Rpb25hYmxlIHdheXMgdG8gaW1wcm92ZS4KOC4gUHJvb2ZyZWFkOiBDaGVjayB0b25lLCBncmFtbWFyLCBhbmQgY2xhcml0eSBiZWZvcmUgc3VibWl0dGluZy4K",
|
||||
"mimeType": "text/plain"}}], "role": "user"}], "generationConfig": {}}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- '*/*'
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '971'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- generativelanguage.googleapis.com
|
||||
x-goog-api-client:
|
||||
- google-genai-sdk/1.49.0 gl-python/3.12.10
|
||||
x-goog-api-key:
|
||||
- X-GOOG-API-KEY-XXX
|
||||
method: POST
|
||||
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\":
|
||||
[\n {\n \"text\": \"Effective review feedback should be
|
||||
clear, specific, balanced, respectful, and constructive, focusing on behaviors
|
||||
and outcomes with examples, objective criteria, and suggested next steps,
|
||||
ensuring it is proofread for clarity.\\n\"\n }\n ],\n \"role\":
|
||||
\"model\"\n },\n \"finishReason\": \"STOP\",\n \"avgLogprobs\":
|
||||
-0.35489303309743\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\":
|
||||
135,\n \"candidatesTokenCount\": 41,\n \"totalTokenCount\": 176,\n \"promptTokensDetails\":
|
||||
[\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 135\n
|
||||
\ }\n ],\n \"candidatesTokensDetails\": [\n {\n \"modality\":
|
||||
\"TEXT\",\n \"tokenCount\": 41\n }\n ]\n },\n \"modelVersion\":
|
||||
\"gemini-2.0-flash\",\n \"responseId\": \"xBZzaY2tCsa9jrEP7JT1yAo\"\n}\n"
|
||||
headers:
|
||||
Alt-Svc:
|
||||
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
|
||||
Content-Type:
|
||||
- application/json; charset=UTF-8
|
||||
Date:
|
||||
- Fri, 23 Jan 2026 06:35:48 GMT
|
||||
Server:
|
||||
- scaffolding on HTTPServer2
|
||||
Server-Timing:
|
||||
- gfet4t7; dur=732
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
Vary:
|
||||
- Origin
|
||||
- X-Origin
|
||||
- Referer
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
X-Frame-Options:
|
||||
- X-FRAME-OPTIONS-XXX
|
||||
X-XSS-Protection:
|
||||
- '0'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
File diff suppressed because one or more lines are too long
@@ -18,6 +18,7 @@ from crewai_files import (
|
||||
VideoFile,
|
||||
format_multimodal_content,
|
||||
)
|
||||
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
|
||||
|
||||
|
||||
# Path to test data files
|
||||
@@ -559,6 +560,153 @@ class TestGenericFileIntegration:
|
||||
|
||||
response = llm.call(messages)
|
||||
|
||||
assert response
|
||||
assert isinstance(response, str)
|
||||
assert len(response) > 0
|
||||
|
||||
|
||||
def _build_multimodal_message_with_upload(
|
||||
llm: LLM, prompt: str, files: dict
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Build a multimodal message using file_id uploads instead of inline base64.
|
||||
|
||||
Note: OpenAI Chat Completions API only supports file_id for PDFs via
|
||||
type="file", not for images. For image file_id support, OpenAI requires
|
||||
the Responses API (type="input_image"). Since crewAI uses Chat Completions,
|
||||
we test file_id uploads with Anthropic which supports file_id for all types.
|
||||
|
||||
Returns:
|
||||
Tuple of (messages, content_blocks) where content_blocks can be inspected
|
||||
to verify file_id was used.
|
||||
"""
|
||||
from crewai_files.formatting.anthropic import AnthropicFormatter
|
||||
|
||||
config = FileResolverConfig(prefer_upload=True)
|
||||
resolver = FileResolver(config=config)
|
||||
formatter = AnthropicFormatter()
|
||||
|
||||
content_blocks = []
|
||||
for file in files.values():
|
||||
resolved = resolver.resolve(file, "anthropic")
|
||||
block = formatter.format_block(file, resolved)
|
||||
if block is not None:
|
||||
content_blocks.append(block)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
llm.format_text_content(prompt),
|
||||
*content_blocks,
|
||||
],
|
||||
}
|
||||
]
|
||||
return messages, content_blocks
|
||||
|
||||
|
||||
def _build_responses_message_with_upload(
|
||||
llm: LLM, prompt: str, files: dict
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Build a Responses API message using file_id uploads.
|
||||
|
||||
The Responses API supports file_id for images via type="input_image".
|
||||
|
||||
Returns:
|
||||
Tuple of (messages, content_blocks) where content_blocks can be inspected
|
||||
to verify file_id was used.
|
||||
"""
|
||||
from crewai_files.formatting import OpenAIResponsesFormatter
|
||||
|
||||
config = FileResolverConfig(prefer_upload=True)
|
||||
resolver = FileResolver(config=config)
|
||||
|
||||
content_blocks = []
|
||||
for file in files.values():
|
||||
resolved = resolver.resolve(file, "openai")
|
||||
block = OpenAIResponsesFormatter.format_block(resolved, file.content_type)
|
||||
content_blocks.append(block)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "input_text", "text": prompt},
|
||||
*content_blocks,
|
||||
],
|
||||
}
|
||||
]
|
||||
return messages, content_blocks
|
||||
|
||||
|
||||
class TestAnthropicFileUploadIntegration:
|
||||
"""Integration tests for Anthropic multimodal with file_id uploads.
|
||||
|
||||
We test file_id uploads with Anthropic because OpenAI Chat Completions API
|
||||
only supports file_id references for PDFs (type="file"), not images.
|
||||
OpenAI's Responses API supports image file_id (type="input_image"), but
|
||||
crewAI currently uses Chat Completions. Anthropic supports file_id for
|
||||
all content types including images.
|
||||
"""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_describe_image_with_file_id(self, test_image_bytes: bytes) -> None:
|
||||
"""Test Anthropic can describe an image uploaded via Files API."""
|
||||
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
|
||||
files = {"image": ImageFile(source=test_image_bytes)}
|
||||
|
||||
messages, content_blocks = _build_multimodal_message_with_upload(
|
||||
llm,
|
||||
"Describe this image in one sentence. Be brief.",
|
||||
files,
|
||||
)
|
||||
|
||||
# Verify we're using file_id, not base64
|
||||
assert len(content_blocks) == 1
|
||||
source = content_blocks[0].get("source", {})
|
||||
assert source.get("type") == "file", (
|
||||
f"Expected source type 'file' for file_id upload, got '{source.get('type')}'. "
|
||||
"This test verifies file_id uploads work - if falling back to base64, "
|
||||
"check that the Anthropic Files API uploader is working correctly."
|
||||
)
|
||||
assert "file_id" in source, "Expected file_id in source for file_id upload"
|
||||
|
||||
response = llm.call(messages)
|
||||
|
||||
assert response
|
||||
assert isinstance(response, str)
|
||||
assert len(response) > 0
|
||||
|
||||
|
||||
class TestOpenAIResponsesFileUploadIntegration:
|
||||
"""Integration tests for OpenAI Responses API with file_id uploads.
|
||||
|
||||
The Responses API supports file_id for images via type="input_image",
|
||||
unlike Chat Completions which only supports file_id for PDFs.
|
||||
"""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_describe_image_with_file_id(self, test_image_bytes: bytes) -> None:
|
||||
"""Test OpenAI Responses API can describe an image uploaded via Files API."""
|
||||
llm = LLM(model="openai/gpt-4o-mini", api="responses")
|
||||
files = {"image": ImageFile(source=test_image_bytes)}
|
||||
|
||||
messages, content_blocks = _build_responses_message_with_upload(
|
||||
llm,
|
||||
"Describe this image in one sentence. Be brief.",
|
||||
files,
|
||||
)
|
||||
|
||||
# Verify we're using file_id with input_image type
|
||||
assert len(content_blocks) == 1
|
||||
block = content_blocks[0]
|
||||
assert block.get("type") == "input_image", (
|
||||
f"Expected type 'input_image' for Responses API, got '{block.get('type')}'. "
|
||||
"This test verifies file_id uploads work with the Responses API."
|
||||
)
|
||||
assert "file_id" in block, "Expected file_id in block for file_id upload"
|
||||
|
||||
response = llm.call(messages)
|
||||
|
||||
assert response
|
||||
assert isinstance(response, str)
|
||||
assert len(response) > 0
|
||||
Reference in New Issue
Block a user