feat(files): add file_id upload support and text file handling

- Add VCR patch for binary request bodies (base64 encoding fallback) - Add generate_filename() utility for UUID-based filenames with extension - Add OpenAIResponsesFormatter for Responses API (input_image, input_file) - Fix OpenAI uploader to use 'vision' purpose for images - Fix Anthropic uploader to use tuple format (filename, content, content_type) - Add TextConstraints and text support for Gemini - Add file_id upload integration tests for Anthropic and OpenAI Responses API
2026-01-23 07:08:14 +00:00 · 2026-01-23 01:57:29 -05:00
parent 7c9ce9ccd8
commit 4ab53c0726
14 changed files with 833 additions and 44 deletions
--- a/conftest.py
+++ b/conftest.py
@@ -1,5 +1,6 @@
 """Pytest configuration for crewAI workspace."""

+import base64
 from collections.abc import Generator
 import gzip
 import os
@@ -10,6 +11,7 @@ from typing import Any
 from dotenv import load_dotenv
 import pytest
 from vcr.request import Request  # type: ignore[import-untyped]
+import vcr.stubs.httpx_stubs as httpx_stubs  # type: ignore[import-untyped]


 env_test_path = Path(__file__).parent / ".env.test"
@@ -17,6 +19,25 @@ load_dotenv(env_test_path, override=True)
 load_dotenv(override=True)


+def _patched_make_vcr_request(httpx_request: Any, **kwargs: Any) -> Any:
+    """Patched version of VCR's _make_vcr_request that handles binary content.
+
+    The original implementation fails on binary request bodies (like file uploads)
+    because it assumes all content can be decoded as UTF-8.
+    """
+    raw_body = httpx_request.read()
+    try:
+        body = raw_body.decode("utf-8")
+    except UnicodeDecodeError:
+        body = base64.b64encode(raw_body).decode("ascii")
+    uri = str(httpx_request.url)
+    headers = dict(httpx_request.headers)
+    return Request(httpx_request.method, uri, body, headers)
+
+
+httpx_stubs._make_vcr_request = _patched_make_vcr_request
+
+
@pytest.fixture(autouse=True, scope="function")
 def cleanup_event_handlers() -> Generator[None, Any, None]:
    """Clean up event bus handlers after each test to prevent test pollution."""
--- a/lib/crewai-files/src/crewai_files/cache/metrics.py
+++ b/lib/crewai-files/src/crewai_files/cache/metrics.py
@@ -54,7 +54,7 @@ class FileOperationMetrics:
        }

        if self.filename:
-            result["filename"] = self.filename
+            result["file_name"] = self.filename
        if self.provider:
            result["provider"] = self.provider
        if self.size_bytes is not None:
--- a/lib/crewai-files/src/crewai_files/core/sources.py
+++ b/lib/crewai-files/src/crewai_files/core/sources.py
@@ -64,6 +64,21 @@ def _fallback_content_type(filename: str | None) -> str:
    return "application/octet-stream"


+def generate_filename(content_type: str) -> str:
+    """Generate a UUID-based filename with extension from content type.
+
+    Args:
+        content_type: MIME type to derive extension from.
+
+    Returns:
+        Filename in format "{uuid}{ext}" where ext includes the dot.
+    """
+    import uuid
+
+    ext = mimetypes.guess_extension(content_type) or ""
+    return f"{uuid.uuid4()}{ext}"
+
+
 def detect_content_type(data: bytes, filename: str | None = None) -> str:
    """Detect MIME type from file content.

--- a/lib/crewai-files/src/crewai_files/formatting/init.py
+++ b/lib/crewai-files/src/crewai_files/formatting/init.py
@@ -4,9 +4,11 @@ from crewai_files.formatting.api import (
    aformat_multimodal_content,
    format_multimodal_content,
 )
+from crewai_files.formatting.openai import OpenAIResponsesFormatter


 __all__ = [
+    "OpenAIResponsesFormatter",
    "aformat_multimodal_content",
    "format_multimodal_content",
 ]
--- a/lib/crewai-files/src/crewai_files/formatting/api.py
+++ b/lib/crewai-files/src/crewai_files/formatting/api.py
@@ -186,6 +186,11 @@ def _get_supported_types(
        supported.append("audio/")
    if constraints.video is not None:
        supported.append("video/")
+    if constraints.text is not None:
+        supported.append("text/")
+        supported.append("application/json")
+        supported.append("application/xml")
+        supported.append("application/x-yaml")
    return supported


--- a/lib/crewai-files/src/crewai_files/formatting/openai.py
+++ b/lib/crewai-files/src/crewai_files/formatting/openai.py
@@ -14,6 +14,95 @@ from crewai_files.core.resolved import (
 )


+class OpenAIResponsesFormatter:
+    """Formats resolved files into OpenAI Responses API content blocks.
+
+    The Responses API uses a different format than Chat Completions:
+    - Images use `type: "input_image"` with `file_id` or `image_url`
+    - PDFs use `type: "input_file"` with `file_id`, `file_url`, or `file_data`
+    """
+
+    @staticmethod
+    def format_block(resolved: ResolvedFileType, content_type: str) -> dict[str, Any]:
+        """Format a resolved file into an OpenAI Responses API content block.
+
+        Args:
+            resolved: Resolved file.
+            content_type: MIME type of the file.
+
+        Returns:
+            Content block dict.
+
+        Raises:
+            TypeError: If resolved type is not supported.
+        """
+        is_image = content_type.startswith("image/")
+        is_pdf = content_type == "application/pdf"
+
+        if isinstance(resolved, FileReference):
+            if is_image:
+                return {
+                    "type": "input_image",
+                    "file_id": resolved.file_id,
+                }
+            if is_pdf:
+                return {
+                    "type": "input_file",
+                    "file_id": resolved.file_id,
+                }
+            raise TypeError(
+                f"Unsupported content type for Responses API: {content_type}"
+            )
+
+        if isinstance(resolved, UrlReference):
+            if is_image:
+                return {
+                    "type": "input_image",
+                    "image_url": resolved.url,
+                }
+            if is_pdf:
+                return {
+                    "type": "input_file",
+                    "file_url": resolved.url,
+                }
+            raise TypeError(
+                f"Unsupported content type for Responses API: {content_type}"
+            )
+
+        if isinstance(resolved, InlineBase64):
+            if is_image:
+                return {
+                    "type": "input_image",
+                    "image_url": f"data:{resolved.content_type};base64,{resolved.data}",
+                }
+            if is_pdf:
+                return {
+                    "type": "input_file",
+                    "file_data": f"data:{resolved.content_type};base64,{resolved.data}",
+                }
+            raise TypeError(
+                f"Unsupported content type for Responses API: {content_type}"
+            )
+
+        if isinstance(resolved, InlineBytes):
+            data = base64.b64encode(resolved.data).decode("ascii")
+            if is_image:
+                return {
+                    "type": "input_image",
+                    "image_url": f"data:{resolved.content_type};base64,{data}",
+                }
+            if is_pdf:
+                return {
+                    "type": "input_file",
+                    "file_data": f"data:{resolved.content_type};base64,{data}",
+                }
+            raise TypeError(
+                f"Unsupported content type for Responses API: {content_type}"
+            )
+
+        raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
+
+
 class OpenAIFormatter:
    """Formats resolved files into OpenAI content blocks."""

--- a/lib/crewai-files/src/crewai_files/processing/constraints.py
+++ b/lib/crewai-files/src/crewai_files/processing/constraints.py
@@ -7,6 +7,7 @@ from typing import Literal
 from crewai_files.core.types import (
    AudioMimeType,
    ImageMimeType,
+    TextContentType,
    VideoMimeType,
 )

@@ -72,6 +73,27 @@ GEMINI_VIDEO_FORMATS: tuple[VideoMimeType, ...] = (
    "video/x-flv",
 )

+DEFAULT_TEXT_FORMATS: tuple[TextContentType, ...] = (
+    "text/plain",
+    "text/markdown",
+    "text/csv",
+    "application/json",
+    "text/xml",
+    "text/html",
+)
+
+GEMINI_TEXT_FORMATS: tuple[TextContentType, ...] = (
+    "text/plain",
+    "text/markdown",
+    "text/csv",
+    "application/json",
+    "application/xml",
+    "text/xml",
+    "application/x-yaml",
+    "text/yaml",
+    "text/html",
+)
+

@dataclass(frozen=True)
 class ImageConstraints:
@@ -135,6 +157,19 @@ class VideoConstraints:
    supported_formats: tuple[VideoMimeType, ...] = DEFAULT_VIDEO_FORMATS


+@dataclass(frozen=True)
+class TextConstraints:
+    """Constraints for text files.
+
+    Attributes:
+        max_size_bytes: Maximum file size in bytes.
+        supported_formats: Supported text MIME types.
+    """
+
+    max_size_bytes: int
+    supported_formats: tuple[TextContentType, ...] = DEFAULT_TEXT_FORMATS
+
+
@dataclass(frozen=True)
 class ProviderConstraints:
    """Complete set of constraints for a provider.
@@ -145,6 +180,7 @@ class ProviderConstraints:
        pdf: PDF file constraints.
        audio: Audio file constraints.
        video: Video file constraints.
+        text: Text file constraints.
        general_max_size_bytes: Maximum size for any file type.
        supports_file_upload: Whether the provider supports file upload APIs.
        file_upload_threshold_bytes: Size threshold above which to use file upload.
@@ -156,6 +192,7 @@ class ProviderConstraints:
    pdf: PDFConstraints | None = None
    audio: AudioConstraints | None = None
    video: VideoConstraints | None = None
+    text: TextConstraints | None = None
    general_max_size_bytes: int | None = None
    supports_file_upload: bool = False
    file_upload_threshold_bytes: int | None = None
@@ -213,6 +250,10 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
        max_duration_seconds=3600,  # 1 hour at default resolution
        supported_formats=GEMINI_VIDEO_FORMATS,
    ),
+    text=TextConstraints(
+        max_size_bytes=104_857_600,
+        supported_formats=GEMINI_TEXT_FORMATS,
+    ),
    supports_file_upload=True,
    file_upload_threshold_bytes=20_971_520,
    supports_url_references=True,
--- a/lib/crewai-files/src/crewai_files/uploaders/anthropic.py
+++ b/lib/crewai-files/src/crewai_files/uploaders/anthropic.py
@@ -2,11 +2,11 @@

 from __future__ import annotations

-import io
 import logging
 import os
 from typing import Any

+from crewai_files.core.sources import generate_filename
 from crewai_files.core.types import FileInput
 from crewai_files.processing.exceptions import classify_upload_error
 from crewai_files.uploaders.base import FileUploader, UploadResult
@@ -91,17 +91,14 @@ class AnthropicFileUploader(FileUploader):
            client = self._get_client()

            content = file.read()
-            file_purpose = purpose or "user_upload"
-
-            file_data = io.BytesIO(content)

            logger.info(
                f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
            )

-            uploaded_file = client.files.create(
-                file=(file.filename, file_data, file.content_type),
-                purpose=file_purpose,
+            filename = file.filename or generate_filename(file.content_type)
+            uploaded_file = client.beta.files.upload(
+                file=(filename, content, file.content_type),
            )

            logger.info(f"Uploaded to Anthropic: {uploaded_file.id}")
@@ -129,7 +126,7 @@ class AnthropicFileUploader(FileUploader):
        """
        try:
            client = self._get_client()
-            client.files.delete(file_id=file_id)
+            client.beta.files.delete(file_id=file_id)
            logger.info(f"Deleted Anthropic file: {file_id}")
            return True
        except Exception as e:
@@ -147,7 +144,7 @@ class AnthropicFileUploader(FileUploader):
        """
        try:
            client = self._get_client()
-            file_info = client.files.retrieve(file_id=file_id)
+            file_info = client.beta.files.retrieve(file_id=file_id)
            return {
                "id": file_info.id,
                "filename": file_info.filename,
@@ -167,7 +164,7 @@ class AnthropicFileUploader(FileUploader):
        """
        try:
            client = self._get_client()
-            files = client.files.list()
+            files = client.beta.files.list()
            return [
                {
                    "id": f.id,
@@ -202,17 +199,14 @@ class AnthropicFileUploader(FileUploader):
            client = self._get_async_client()

            content = await file.aread()
-            file_purpose = purpose or "user_upload"
-
-            file_data = io.BytesIO(content)

            logger.info(
                f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
            )

-            uploaded_file = await client.files.create(
-                file=(file.filename, file_data, file.content_type),
-                purpose=file_purpose,
+            filename = file.filename or generate_filename(file.content_type)
+            uploaded_file = await client.beta.files.upload(
+                file=(filename, content, file.content_type),
            )

            logger.info(f"Uploaded to Anthropic: {uploaded_file.id}")
@@ -240,7 +234,7 @@ class AnthropicFileUploader(FileUploader):
        """
        try:
            client = self._get_async_client()
-            await client.files.delete(file_id=file_id)
+            await client.beta.files.delete(file_id=file_id)
            logger.info(f"Deleted Anthropic file: {file_id}")
            return True
        except Exception as e:
--- a/lib/crewai-files/src/crewai_files/uploaders/openai.py
+++ b/lib/crewai-files/src/crewai_files/uploaders/openai.py
@@ -9,7 +9,7 @@ import os
 from typing import Any

 from crewai_files.core.constants import DEFAULT_UPLOAD_CHUNK_SIZE, FILES_API_MAX_SIZE
-from crewai_files.core.sources import FileBytes, FilePath, FileStream
+from crewai_files.core.sources import FileBytes, FilePath, FileStream, generate_filename
 from crewai_files.core.types import FileInput
 from crewai_files.processing.exceptions import (
    PermanentUploadError,
@@ -22,6 +22,27 @@ from crewai_files.uploaders.base import FileUploader, UploadResult
 logger = logging.getLogger(__name__)


+def _get_purpose_for_content_type(content_type: str, purpose: str | None) -> str:
+    """Get the appropriate purpose for a file based on content type.
+
+    OpenAI Files API requires different purposes for different file types:
+    - Images (for Responses API vision): "vision"
+    - PDFs and other documents: "user_data"
+
+    Args:
+        content_type: MIME type of the file.
+        purpose: Optional explicit purpose override.
+
+    Returns:
+        The purpose string to use for upload.
+    """
+    if purpose is not None:
+        return purpose
+    if content_type.startswith("image/"):
+        return "vision"
+    return "user_data"
+
+
 def _get_file_size(file: FileInput) -> int | None:
    """Get file size without reading content if possible.

@@ -219,13 +240,14 @@ class OpenAIFileUploader(FileUploader):
            UploadResult with the file ID and metadata.
        """
        client = self._get_client()
-        file_purpose = purpose or "user_data"
+        file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
+        filename = file.filename or generate_filename(file.content_type)

        file_data = io.BytesIO(content)
-        file_data.name = file.filename or "file"
+        file_data.name = filename

        logger.info(
-            f"Uploading file '{file.filename}' to OpenAI Files API ({len(content)} bytes)"
+            f"Uploading file '{filename}' to OpenAI Files API ({len(content)} bytes)"
        )

        uploaded_file = client.files.create(
@@ -254,8 +276,8 @@ class OpenAIFileUploader(FileUploader):
            UploadResult with the file ID and metadata.
        """
        client = self._get_client()
-        file_purpose = purpose or "user_data"
-        filename = file.filename or "file"
+        file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
+        filename = file.filename or generate_filename(file.content_type)
        file_size = len(content)

        logger.info(
@@ -329,8 +351,8 @@ class OpenAIFileUploader(FileUploader):
            UploadResult with the file ID and metadata.
        """
        client = self._get_client()
-        file_purpose = purpose or "user_data"
-        filename = file.filename or "file"
+        file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
+        filename = file.filename or generate_filename(file.content_type)

        logger.info(
            f"Uploading file '{filename}' to OpenAI Uploads API (streaming) "
@@ -496,10 +518,10 @@ class OpenAIFileUploader(FileUploader):
            UploadResult with the file ID and metadata.
        """
        client = self._get_async_client()
-        file_purpose = purpose or "user_data"
+        file_purpose = _get_purpose_for_content_type(file.content_type, purpose)

        file_data = io.BytesIO(content)
-        file_data.name = file.filename or "file"
+        file_data.name = file.filename or generate_filename(file.content_type)

        logger.info(
            f"Uploading file '{file.filename}' to OpenAI Files API ({len(content)} bytes)"
@@ -531,8 +553,8 @@ class OpenAIFileUploader(FileUploader):
            UploadResult with the file ID and metadata.
        """
        client = self._get_async_client()
-        file_purpose = purpose or "user_data"
-        filename = file.filename or "file"
+        file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
+        filename = file.filename or generate_filename(file.content_type)
        file_size = len(content)

        logger.info(
@@ -606,8 +628,8 @@ class OpenAIFileUploader(FileUploader):
            UploadResult with the file ID and metadata.
        """
        client = self._get_async_client()
-        file_purpose = purpose or "user_data"
-        filename = file.filename or "file"
+        file_purpose = _get_purpose_for_content_type(file.content_type, purpose)
+        filename = file.filename or generate_filename(file.content_type)

        logger.info(
            f"Uploading file '{filename}' to OpenAI Uploads API (streaming) "
--- a/lib/crewai/tests/cassettes/llms/TestAnthropicFileUploadIntegration.test_describe_image_with_file_id.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestAnthropicFileUploadIntegration.test_describe_image_with_file_id.yaml
--- a/lib/crewai/tests/cassettes/llms/TestGeminiMultimodalIntegration.test_analyze_text_file.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestGeminiMultimodalIntegration.test_analyze_text_file.yaml
@@ -1,7 +1,8 @@
 interactions:
 - request:
    body: '{"contents": [{"parts": [{"text": "Summarize what this text file says in
-      one sentence."}], "role": "user"}], "generationConfig": {}}'
+      one sentence."}, {"inlineData": {"data": "UmV2aWV3IEd1aWRlbGluZXMKCjEuIEJlIGNsZWFyIGFuZCBjb25jaXNlOiBXcml0ZSBmZWVkYmFjayB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4KMi4gRm9jdXMgb24gYmVoYXZpb3IgYW5kIG91dGNvbWVzOiBEZXNjcmliZSB3aGF0IGhhcHBlbmVkIGFuZCB3aHkgaXQgbWF0dGVycy4KMy4gQmUgc3BlY2lmaWM6IFByb3ZpZGUgZXhhbXBsZXMgdG8gc3VwcG9ydCB5b3VyIHBvaW50cy4KNC4gQmFsYW5jZSBwb3NpdGl2ZXMgYW5kIGltcHJvdmVtZW50czogSGlnaGxpZ2h0IHN0cmVuZ3RocyBhbmQgYXJlYXMgdG8gZ3Jvdy4KNS4gQmUgcmVzcGVjdGZ1bCBhbmQgY29uc3RydWN0aXZlOiBBc3N1bWUgcG9zaXRpdmUgaW50ZW50IGFuZCBvZmZlciBzb2x1dGlvbnMuCjYuIFVzZSBvYmplY3RpdmUgY3JpdGVyaWE6IFJlZmVyZW5jZSBnb2FscywgbWV0cmljcywgb3IgZXhwZWN0YXRpb25zIHdoZXJlIHBvc3NpYmxlLgo3LiBTdWdnZXN0IG5leHQgc3RlcHM6IFJlY29tbWVuZCBhY3Rpb25hYmxlIHdheXMgdG8gaW1wcm92ZS4KOC4gUHJvb2ZyZWFkOiBDaGVjayB0b25lLCBncmFtbWFyLCBhbmQgY2xhcml0eSBiZWZvcmUgc3VibWl0dGluZy4K",
+      "mimeType": "text/plain"}}], "role": "user"}], "generationConfig": {}}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -12,7 +13,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '132'
+      - '976'
      content-type:
      - application/json
      host:
@@ -26,27 +27,28 @@ interactions:
  response:
    body:
      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
-        [\n          {\n            \"text\": \"Please provide the text file so I
-        can summarize it for you. I need the content of the file to be able to understand
-        and summarize it in one sentence.\\n\"\n          }\n        ],\n        \"role\":
+        [\n          {\n            \"text\": \"The text file outlines guidelines
+        for providing effective feedback, emphasizing clarity, specificity, a balance
+        of positive and constructive criticism, respect, objectivity, actionable suggestions,
+        and careful proofreading.\\n\"\n          }\n        ],\n        \"role\":
        \"model\"\n      },\n      \"finishReason\": \"STOP\",\n      \"avgLogprobs\":
-        -0.17782547979643851\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
-        11,\n    \"candidatesTokenCount\": 33,\n    \"totalTokenCount\": 44,\n    \"promptTokensDetails\":
-        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 11\n
+        -0.17109338442484537\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        136,\n    \"candidatesTokenCount\": 36,\n    \"totalTokenCount\": 172,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 136\n
        \     }\n    ],\n    \"candidatesTokensDetails\": [\n      {\n        \"modality\":
-        \"TEXT\",\n        \"tokenCount\": 33\n      }\n    ]\n  },\n  \"modelVersion\":
-        \"gemini-2.0-flash\",\n  \"responseId\": \"b-dyabKwN8a9jrEP7JT1yAo\"\n}\n"
+        \"TEXT\",\n        \"tokenCount\": 36\n      }\n    ]\n  },\n  \"modelVersion\":
+        \"gemini-2.0-flash\",\n  \"responseId\": \"wxZzaYaiGYG2_uMPtMjFiAw\"\n}\n"
    headers:
      Alt-Svc:
      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
      Content-Type:
      - application/json; charset=UTF-8
      Date:
-      - Fri, 23 Jan 2026 03:13:52 GMT
+      - Fri, 23 Jan 2026 06:35:48 GMT
      Server:
      - scaffolding on HTTPServer2
      Server-Timing:
-      - gfet4t7; dur=631
+      - gfet4t7; dur=675
      Transfer-Encoding:
      - chunked
      Vary:
--- a/lib/crewai/tests/cassettes/llms/TestGenericFileIntegration.test_generic_file_text_gemini.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestGenericFileIntegration.test_generic_file_text_gemini.yaml
@@ -0,0 +1,67 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "Summarize what this text says in one
+      sentence."}, {"inlineData": {"data": "UmV2aWV3IEd1aWRlbGluZXMKCjEuIEJlIGNsZWFyIGFuZCBjb25jaXNlOiBXcml0ZSBmZWVkYmFjayB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4KMi4gRm9jdXMgb24gYmVoYXZpb3IgYW5kIG91dGNvbWVzOiBEZXNjcmliZSB3aGF0IGhhcHBlbmVkIGFuZCB3aHkgaXQgbWF0dGVycy4KMy4gQmUgc3BlY2lmaWM6IFByb3ZpZGUgZXhhbXBsZXMgdG8gc3VwcG9ydCB5b3VyIHBvaW50cy4KNC4gQmFsYW5jZSBwb3NpdGl2ZXMgYW5kIGltcHJvdmVtZW50czogSGlnaGxpZ2h0IHN0cmVuZ3RocyBhbmQgYXJlYXMgdG8gZ3Jvdy4KNS4gQmUgcmVzcGVjdGZ1bCBhbmQgY29uc3RydWN0aXZlOiBBc3N1bWUgcG9zaXRpdmUgaW50ZW50IGFuZCBvZmZlciBzb2x1dGlvbnMuCjYuIFVzZSBvYmplY3RpdmUgY3JpdGVyaWE6IFJlZmVyZW5jZSBnb2FscywgbWV0cmljcywgb3IgZXhwZWN0YXRpb25zIHdoZXJlIHBvc3NpYmxlLgo3LiBTdWdnZXN0IG5leHQgc3RlcHM6IFJlY29tbWVuZCBhY3Rpb25hYmxlIHdheXMgdG8gaW1wcm92ZS4KOC4gUHJvb2ZyZWFkOiBDaGVjayB0b25lLCBncmFtbWFyLCBhbmQgY2xhcml0eSBiZWZvcmUgc3VibWl0dGluZy4K",
+      "mimeType": "text/plain"}}], "role": "user"}], "generationConfig": {}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '971'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.49.0 gl-python/3.12.10
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"parts\":
+        [\n          {\n            \"text\": \"Effective review feedback should be
+        clear, specific, balanced, respectful, and constructive, focusing on behaviors
+        and outcomes with examples, objective criteria, and suggested next steps,
+        ensuring it is proofread for clarity.\\n\"\n          }\n        ],\n        \"role\":
+        \"model\"\n      },\n      \"finishReason\": \"STOP\",\n      \"avgLogprobs\":
+        -0.35489303309743\n    }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\":
+        135,\n    \"candidatesTokenCount\": 41,\n    \"totalTokenCount\": 176,\n    \"promptTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 135\n
+        \     }\n    ],\n    \"candidatesTokensDetails\": [\n      {\n        \"modality\":
+        \"TEXT\",\n        \"tokenCount\": 41\n      }\n    ]\n  },\n  \"modelVersion\":
+        \"gemini-2.0-flash\",\n  \"responseId\": \"xBZzaY2tCsa9jrEP7JT1yAo\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Fri, 23 Jan 2026 06:35:48 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=732
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/cassettes/llms/TestOpenAIResponsesFileUploadIntegration.test_describe_image_with_file_id.yaml
+++ b/lib/crewai/tests/cassettes/llms/TestOpenAIResponsesFileUploadIntegration.test_describe_image_with_file_id.yaml
--- a/lib/crewai/tests/llms/test_multimodal_integration.py
+++ b/lib/crewai/tests/llms/test_multimodal_integration.py
@@ -18,6 +18,7 @@ from crewai_files import (
    VideoFile,
    format_multimodal_content,
 )
+from crewai_files.resolution.resolver import FileResolver, FileResolverConfig


 # Path to test data files
@@ -559,6 +560,153 @@ class TestGenericFileIntegration:

        response = llm.call(messages)

+        assert response
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+
+def _build_multimodal_message_with_upload(
+    llm: LLM, prompt: str, files: dict
+) -> tuple[list[dict], list[dict]]:
+    """Build a multimodal message using file_id uploads instead of inline base64.
+
+    Note: OpenAI Chat Completions API only supports file_id for PDFs via
+    type="file", not for images. For image file_id support, OpenAI requires
+    the Responses API (type="input_image"). Since crewAI uses Chat Completions,
+    we test file_id uploads with Anthropic which supports file_id for all types.
+
+    Returns:
+        Tuple of (messages, content_blocks) where content_blocks can be inspected
+        to verify file_id was used.
+    """
+    from crewai_files.formatting.anthropic import AnthropicFormatter
+
+    config = FileResolverConfig(prefer_upload=True)
+    resolver = FileResolver(config=config)
+    formatter = AnthropicFormatter()
+
+    content_blocks = []
+    for file in files.values():
+        resolved = resolver.resolve(file, "anthropic")
+        block = formatter.format_block(file, resolved)
+        if block is not None:
+            content_blocks.append(block)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                llm.format_text_content(prompt),
+                *content_blocks,
+            ],
+        }
+    ]
+    return messages, content_blocks
+
+
+def _build_responses_message_with_upload(
+    llm: LLM, prompt: str, files: dict
+) -> tuple[list[dict], list[dict]]:
+    """Build a Responses API message using file_id uploads.
+
+    The Responses API supports file_id for images via type="input_image".
+
+    Returns:
+        Tuple of (messages, content_blocks) where content_blocks can be inspected
+        to verify file_id was used.
+    """
+    from crewai_files.formatting import OpenAIResponsesFormatter
+
+    config = FileResolverConfig(prefer_upload=True)
+    resolver = FileResolver(config=config)
+
+    content_blocks = []
+    for file in files.values():
+        resolved = resolver.resolve(file, "openai")
+        block = OpenAIResponsesFormatter.format_block(resolved, file.content_type)
+        content_blocks.append(block)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "input_text", "text": prompt},
+                *content_blocks,
+            ],
+        }
+    ]
+    return messages, content_blocks
+
+
+class TestAnthropicFileUploadIntegration:
+    """Integration tests for Anthropic multimodal with file_id uploads.
+
+    We test file_id uploads with Anthropic because OpenAI Chat Completions API
+    only supports file_id references for PDFs (type="file"), not images.
+    OpenAI's Responses API supports image file_id (type="input_image"), but
+    crewAI currently uses Chat Completions. Anthropic supports file_id for
+    all content types including images.
+    """
+
+    @pytest.mark.vcr()
+    def test_describe_image_with_file_id(self, test_image_bytes: bytes) -> None:
+        """Test Anthropic can describe an image uploaded via Files API."""
+        llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
+        files = {"image": ImageFile(source=test_image_bytes)}
+
+        messages, content_blocks = _build_multimodal_message_with_upload(
+            llm,
+            "Describe this image in one sentence. Be brief.",
+            files,
+        )
+
+        # Verify we're using file_id, not base64
+        assert len(content_blocks) == 1
+        source = content_blocks[0].get("source", {})
+        assert source.get("type") == "file", (
+            f"Expected source type 'file' for file_id upload, got '{source.get('type')}'. "
+            "This test verifies file_id uploads work - if falling back to base64, "
+            "check that the Anthropic Files API uploader is working correctly."
+        )
+        assert "file_id" in source, "Expected file_id in source for file_id upload"
+
+        response = llm.call(messages)
+
+        assert response
+        assert isinstance(response, str)
+        assert len(response) > 0
+
+
+class TestOpenAIResponsesFileUploadIntegration:
+    """Integration tests for OpenAI Responses API with file_id uploads.
+
+    The Responses API supports file_id for images via type="input_image",
+    unlike Chat Completions which only supports file_id for PDFs.
+    """
+
+    @pytest.mark.vcr()
+    def test_describe_image_with_file_id(self, test_image_bytes: bytes) -> None:
+        """Test OpenAI Responses API can describe an image uploaded via Files API."""
+        llm = LLM(model="openai/gpt-4o-mini", api="responses")
+        files = {"image": ImageFile(source=test_image_bytes)}
+
+        messages, content_blocks = _build_responses_message_with_upload(
+            llm,
+            "Describe this image in one sentence. Be brief.",
+            files,
+        )
+
+        # Verify we're using file_id with input_image type
+        assert len(content_blocks) == 1
+        block = content_blocks[0]
+        assert block.get("type") == "input_image", (
+            f"Expected type 'input_image' for Responses API, got '{block.get('type')}'. "
+            "This test verifies file_id uploads work with the Responses API."
+        )
+        assert "file_id" in block, "Expected file_id in block for file_id upload"
+
+        response = llm.call(messages)
+
        assert response
        assert isinstance(response, str)
        assert len(response) > 0