refactor: improve multimodal file handling architecture

- Make crewai_files an optional dependency with graceful fallbacks
- Move file formatting from executor to LLM layer (_process_message_files)
- Add files field to LLMMessage type for cleaner message passing
- Add cache_control to Anthropic content blocks for prompt caching
- Clean up formatters: static methods for OpenAI/Gemini, proper error handling
- Remove unused ContentFormatter protocol
- Move test fixtures to lib/crewai-files/tests/fixtures
- Add Azure and Bedrock multimodal integration tests
- Fix mypy errors in crew_agent_executor.py
This commit is contained in:
Greyson LaLonde
2026-01-22 21:55:10 -05:00
parent dc015b14f9
commit a1cbb2f4e2
31 changed files with 320 additions and 1278 deletions

View File

@@ -8,7 +8,8 @@ from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
InlineBytes,
ResolvedFileType,
UrlReference,
)
from crewai_files.core.types import FileInput
@@ -20,7 +21,7 @@ class AnthropicFormatter:
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
resolved: ResolvedFileType,
) -> dict[str, Any] | None:
"""Format a resolved file into an Anthropic content block.
@@ -43,6 +44,7 @@ class AnthropicFormatter:
"type": "file",
"file_id": resolved.file_id,
},
"cache_control": {"type": "ephemeral"},
}
if isinstance(resolved, UrlReference):
@@ -52,6 +54,7 @@ class AnthropicFormatter:
"type": "url",
"url": resolved.url,
},
"cache_control": {"type": "ephemeral"},
}
if isinstance(resolved, InlineBase64):
@@ -62,17 +65,21 @@ class AnthropicFormatter:
"media_type": resolved.content_type,
"data": resolved.data,
},
"cache_control": {"type": "ephemeral"},
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"type": block_type,
"source": {
"type": "base64",
"media_type": content_type,
"data": data,
},
}
if isinstance(resolved, InlineBytes):
return {
"type": block_type,
"source": {
"type": "base64",
"media_type": resolved.content_type,
"data": base64.b64encode(resolved.data).decode("ascii"),
},
"cache_control": {"type": "ephemeral"},
}
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
@staticmethod
def _get_block_type(content_type: str) -> str | None:

View File

@@ -274,4 +274,8 @@ def _format_block(
"""
if isinstance(formatter, BedrockFormatter):
return formatter.format_block(file_input, resolved, name=name)
return formatter.format_block(file_input, resolved)
if isinstance(formatter, AnthropicFormatter):
return formatter.format_block(file_input, resolved)
if isinstance(formatter, (OpenAIFormatter, GeminiFormatter)):
return formatter.format_block(resolved)
raise TypeError(f"Unknown formatter type: {type(formatter).__name__}")

View File

@@ -1,28 +0,0 @@
"""Base formatter protocol for provider-specific content blocks."""
from __future__ import annotations
from typing import Any, Protocol
from crewai_files.core.resolved import ResolvedFile
from crewai_files.core.types import FileInput
class ContentFormatter(Protocol):
"""Protocol for formatting resolved files into provider content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
"""Format a resolved file into a provider-specific content block.
Args:
file: Original file input with metadata.
resolved: Resolved file (FileReference, InlineBase64, etc.).
Returns:
Content block dict or None if file type not supported.
"""
...

View File

@@ -2,12 +2,15 @@
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
InlineBytes,
ResolvedFile,
ResolvedFileType,
UrlReference,
)
from crewai_files.core.types import FileInput
@@ -49,7 +52,7 @@ class BedrockFormatter:
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
resolved: ResolvedFileType,
name: str | None = None,
) -> dict[str, Any] | None:
"""Format a resolved file into a Bedrock content block.
@@ -64,15 +67,24 @@ class BedrockFormatter:
"""
content_type = file.content_type
if isinstance(resolved, FileReference) and resolved.file_uri:
if isinstance(resolved, FileReference):
if not resolved.file_uri:
raise ValueError("Bedrock requires file_uri for FileReference (S3 URI)")
return self._format_s3_block(content_type, resolved.file_uri, name)
if isinstance(resolved, InlineBytes):
file_bytes = resolved.data
else:
file_bytes = file.read()
return self._format_bytes_block(content_type, resolved.data, name)
return self._format_bytes_block(content_type, file_bytes, name)
if isinstance(resolved, InlineBase64):
file_bytes = base64.b64decode(resolved.data)
return self._format_bytes_block(content_type, file_bytes, name)
if isinstance(resolved, UrlReference):
raise ValueError(
"Bedrock does not support URL references - resolve to bytes first"
)
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
def _format_s3_block(
self,

View File

@@ -8,32 +8,31 @@ from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
InlineBytes,
ResolvedFileType,
UrlReference,
)
from crewai_files.core.types import FileInput
class GeminiFormatter:
"""Formats resolved files into Gemini content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
@staticmethod
def format_block(resolved: ResolvedFileType) -> dict[str, Any]:
"""Format a resolved file into a Gemini content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
Content block dict.
if isinstance(resolved, FileReference) and resolved.file_uri:
Raises:
TypeError: If resolved type is not supported.
"""
if isinstance(resolved, FileReference):
if not resolved.file_uri:
raise ValueError("Gemini requires file_uri for FileReference")
return {
"fileData": {
"mimeType": resolved.content_type,
@@ -44,7 +43,7 @@ class GeminiFormatter:
if isinstance(resolved, UrlReference):
return {
"fileData": {
"mimeType": content_type,
"mimeType": resolved.content_type,
"fileUri": resolved.url,
}
}
@@ -57,10 +56,12 @@ class GeminiFormatter:
}
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"inlineData": {
"mimeType": content_type,
"data": data,
if isinstance(resolved, InlineBytes):
return {
"inlineData": {
"mimeType": resolved.content_type,
"data": base64.b64encode(resolved.data).decode("ascii"),
}
}
}
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")

View File

@@ -8,31 +8,28 @@ from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
ResolvedFile,
InlineBytes,
ResolvedFileType,
UrlReference,
)
from crewai_files.core.types import FileInput
class OpenAIFormatter:
"""Formats resolved files into OpenAI content blocks."""
def format_block(
self,
file: FileInput,
resolved: ResolvedFile,
) -> dict[str, Any] | None:
@staticmethod
def format_block(resolved: ResolvedFileType) -> dict[str, Any]:
"""Format a resolved file into an OpenAI content block.
Args:
file: Original file input with metadata.
resolved: Resolved file.
Returns:
Content block dict or None if not supported.
"""
content_type = file.content_type
Content block dict.
Raises:
TypeError: If resolved type is not supported.
"""
if isinstance(resolved, FileReference):
return {
"type": "file",
@@ -53,8 +50,11 @@ class OpenAIFormatter:
},
}
data = base64.b64encode(file.read()).decode("ascii")
return {
"type": "image_url",
"image_url": {"url": f"data:{content_type};base64,{data}"},
}
if isinstance(resolved, InlineBytes):
data = base64.b64encode(resolved.data).decode("ascii")
return {
"type": "image_url",
"image_url": {"url": f"data:{resolved.content_type};base64,{data}"},
}
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")

View File

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

View File

@@ -3,15 +3,6 @@ from typing import Any
import urllib.request
import warnings
from crewai_files import (
AudioFile,
File,
ImageFile,
PDFFile,
TextFile,
VideoFile,
)
from crewai.agent.core import Agent
from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput
@@ -83,20 +74,14 @@ _track_install_async()
__all__ = [
"LLM",
"Agent",
"AudioFile",
"BaseLLM",
"Crew",
"CrewOutput",
"File",
"Flow",
"ImageFile",
"Knowledge",
"LLMGuardrail",
"PDFFile",
"Process",
"Task",
"TaskOutput",
"TextFile",
"VideoFile",
"__version__",
]

View File

@@ -10,7 +10,6 @@ from collections.abc import Callable
import logging
from typing import TYPE_CHECKING, Any, Literal, cast
from crewai_files import aformat_multimodal_content, format_multimodal_content
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
from pydantic_core import CoreSchema, core_schema
@@ -46,7 +45,7 @@ from crewai.utilities.agent_utils import (
track_delegation_if_needed,
)
from crewai.utilities.constants import TRAINING_DATA_FILE
from crewai.utilities.file_store import get_all_files
from crewai.utilities.file_store import aget_all_files, get_all_files
from crewai.utilities.i18n import I18N, get_i18n
from crewai.utilities.printer import Printer
from crewai.utilities.string_utils import sanitize_tool_name
@@ -220,71 +219,41 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
return {"output": formatted_answer.output}
def _inject_multimodal_files(self) -> None:
"""Inject files as multimodal content into messages.
"""Attach files to the last user message for LLM-layer formatting.
For crews with input files and LLMs that support multimodal,
uses crewai_files to process, resolve, and format files into
provider-specific content blocks.
Retrieves crew and task files and attaches them to the message's
`files` field. The LLM layer handles provider-specific formatting.
"""
if not self.crew or not self.task:
return
if not self.llm.supports_multimodal():
return
files = get_all_files(self.crew.id, self.task.id)
if not files:
return
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
content_blocks = format_multimodal_content(files, provider)
if not content_blocks:
return
for i in range(len(self.messages) - 1, -1, -1):
msg = self.messages[i]
if msg.get("role") == "user":
existing_content = msg.get("content", "")
if isinstance(existing_content, str):
msg["content"] = [
self.llm.format_text_content(existing_content),
*content_blocks,
]
msg["files"] = files
break
async def _ainject_multimodal_files(self) -> None:
"""Async inject files as multimodal content into messages.
"""Async attach files to the last user message for LLM-layer formatting.
For crews with input files and LLMs that support multimodal,
uses crewai_files to process, resolve, and format files into
provider-specific content blocks with parallel file resolution.
Retrieves crew and task files and attaches them to the message's
`files` field. The LLM layer handles provider-specific formatting.
"""
if not self.crew or not self.task:
return
if not self.llm.supports_multimodal():
return
files = get_all_files(self.crew.id, self.task.id)
files = await aget_all_files(self.crew.id, self.task.id)
if not files:
return
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
content_blocks = await aformat_multimodal_content(files, provider)
if not content_blocks:
return
for i in range(len(self.messages) - 1, -1, -1):
msg = self.messages[i]
if msg.get("role") == "user":
existing_content = msg.get("content", "")
if isinstance(existing_content, str):
msg["content"] = [
self.llm.format_text_content(existing_content),
*content_blocks,
]
msg["files"] = files
break
def _invoke_loop(self) -> AgentFinish:
@@ -772,7 +741,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if (
original_tool
and hasattr(original_tool, "cache_function")
and original_tool.cache_function
and callable(original_tool.cache_function)
):
should_cache = original_tool.cache_function(
args_dict, raw_result
@@ -803,7 +772,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
error=e,
),
)
elif max_usage_reached:
elif max_usage_reached and original_tool:
# Return error message when max usage limit is reached
result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."

View File

@@ -6,14 +6,6 @@ import asyncio
from collections.abc import Callable, Coroutine, Iterable
from typing import TYPE_CHECKING, Any
from crewai_files import (
AudioFile,
ImageFile,
PDFFile,
TextFile,
VideoFile,
)
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.crews.crew_output import CrewOutput
from crewai.rag.embeddings.types import EmbedderConfig
@@ -27,6 +19,20 @@ from crewai.utilities.streaming import (
from crewai.utilities.types import KickoffInputs
try:
from crewai_files import (
AudioFile,
ImageFile,
PDFFile,
TextFile,
VideoFile,
)
_FILE_TYPES: tuple[type, ...] = (AudioFile, ImageFile, PDFFile, TextFile, VideoFile)
except ImportError:
_FILE_TYPES = ()
if TYPE_CHECKING:
from crewai.crew import Crew
@@ -198,12 +204,14 @@ def _extract_files_from_inputs(inputs: dict[str, Any]) -> dict[str, Any]:
Returns:
Dictionary of extracted file objects.
"""
file_types = (AudioFile, ImageFile, PDFFile, TextFile, VideoFile)
if not _FILE_TYPES:
return {}
files: dict[str, Any] = {}
keys_to_remove: list[str] = []
for key, value in inputs.items():
if isinstance(value, file_types):
if isinstance(value, _FILE_TYPES):
files[key] = value
keys_to_remove.append(key)

View File

@@ -53,6 +53,14 @@ from crewai.utilities.logger_utils import suppress_warnings
from crewai.utilities.string_utils import sanitize_tool_name
try:
from crewai_files import aformat_multimodal_content, format_multimodal_content
HAS_CREWAI_FILES = True
except ImportError:
HAS_CREWAI_FILES = False
if TYPE_CHECKING:
from litellm.exceptions import ContextWindowExceededError
from litellm.litellm_core_utils.get_supported_openai_params import (
@@ -661,12 +669,14 @@ class LLM(BaseLLM):
self,
messages: str | list[LLMMessage],
tools: list[dict[str, BaseTool]] | None = None,
skip_file_processing: bool = False,
) -> dict[str, Any]:
"""Prepare parameters for the completion call.
Args:
messages: Input messages for the LLM
tools: Optional list of tool schemas
skip_file_processing: Skip file processing (used when already done async)
Returns:
Dict[str, Any]: Parameters for the completion call
@@ -674,6 +684,9 @@ class LLM(BaseLLM):
# --- 1) Format messages according to provider requirements
if isinstance(messages, str):
messages = [{"role": "user", "content": messages}]
# --- 1a) Process any file attachments into multimodal content
if not skip_file_processing:
messages = self._process_message_files(messages)
formatted_messages = self._format_messages_for_provider(messages)
# --- 2) Prepare the parameters for the completion call
@@ -1799,6 +1812,9 @@ class LLM(BaseLLM):
if isinstance(messages, str):
messages = [{"role": "user", "content": messages}]
# Process file attachments asynchronously before preparing params
messages = await self._aprocess_message_files(messages)
if "o1" in self.model.lower():
for message in messages:
if message.get("role") == "system":
@@ -1809,7 +1825,9 @@ class LLM(BaseLLM):
if callbacks and len(callbacks) > 0:
self.set_callbacks(callbacks)
try:
params = self._prepare_completion_params(messages, tools)
params = self._prepare_completion_params(
messages, tools, skip_file_processing=True
)
if self.stream:
return await self._ahandle_streaming_response(
@@ -1896,6 +1914,88 @@ class LLM(BaseLLM):
),
)
def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]:
"""Process files attached to messages and format for provider.
For each message with a `files` field, formats the files into
provider-specific content blocks and updates the message content.
Args:
messages: List of messages that may contain file attachments.
Returns:
Messages with files formatted into content blocks.
"""
if not HAS_CREWAI_FILES or not self.supports_multimodal():
return messages
provider = getattr(self, "provider", None) or self.model
for msg in messages:
files = msg.get("files")
if not files:
continue
content_blocks = format_multimodal_content(files, provider)
if not content_blocks:
msg.pop("files", None)
continue
existing_content = msg.get("content", "")
if isinstance(existing_content, str):
msg["content"] = [
self.format_text_content(existing_content),
*content_blocks,
]
elif isinstance(existing_content, list):
msg["content"] = [*existing_content, *content_blocks]
msg.pop("files", None)
return messages
async def _aprocess_message_files(
self, messages: list[LLMMessage]
) -> list[LLMMessage]:
"""Async process files attached to messages and format for provider.
For each message with a `files` field, formats the files into
provider-specific content blocks and updates the message content.
Args:
messages: List of messages that may contain file attachments.
Returns:
Messages with files formatted into content blocks.
"""
if not HAS_CREWAI_FILES or not self.supports_multimodal():
return messages
provider = getattr(self, "provider", None) or self.model
for msg in messages:
files = msg.get("files")
if not files:
continue
content_blocks = await aformat_multimodal_content(files, provider)
if not content_blocks:
msg.pop("files", None)
continue
existing_content = msg.get("content", "")
if isinstance(existing_content, str):
msg["content"] = [
self.format_text_content(existing_content),
*content_blocks,
]
elif isinstance(existing_content, list):
msg["content"] = [*existing_content, *content_blocks]
msg.pop("files", None)
return messages
def _format_messages_for_provider(
self, messages: list[LLMMessage]
) -> list[dict[str, str]]:

View File

@@ -19,12 +19,6 @@ from typing import (
import uuid
import warnings
from crewai_files import (
FileInput,
FilePath,
FileSourceInput,
normalize_input_files,
)
from pydantic import (
UUID4,
BaseModel,
@@ -55,6 +49,17 @@ from crewai.utilities.file_store import (
get_all_files,
store_task_files,
)
try:
from crewai_files import (
FilePath,
normalize_input_files,
)
HAS_CREWAI_FILES = True
except ImportError:
HAS_CREWAI_FILES = False
from crewai.utilities.guardrail import (
process_guardrail,
)
@@ -153,7 +158,7 @@ class Task(BaseModel):
default_factory=list,
description="Tools the agent is limited to use for this task.",
)
input_files: list[FileSourceInput | FileInput] = Field(
input_files: list[Any] = Field(
default_factory=list,
description="List of input files for this task. Accepts paths, bytes, or File objects.",
)
@@ -379,6 +384,9 @@ class Task(BaseModel):
if not v:
return v
if not HAS_CREWAI_FILES:
return v
result = []
for item in v:
if isinstance(item, str):
@@ -1034,7 +1042,7 @@ Follow these guidelines:
Converts input_files list to a named dict and stores under task ID.
"""
if not self.input_files:
if not HAS_CREWAI_FILES or not self.input_files:
return
files_dict = normalize_input_files(self.input_files)

View File

@@ -1,11 +1,16 @@
"""Types for CrewAI utilities."""
from typing import Any, Literal
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Literal
from crewai_files import FileInput
from typing_extensions import NotRequired, TypedDict
if TYPE_CHECKING:
from crewai_files import FileInput
class LLMMessage(TypedDict):
"""Type for formatted LLM messages.
@@ -19,6 +24,7 @@ class LLMMessage(TypedDict):
tool_call_id: NotRequired[str]
name: NotRequired[str]
tool_calls: NotRequired[list[dict[str, Any]]]
files: NotRequired[dict[str, FileInput]]
class KickoffInputs(TypedDict, total=False):
@@ -28,4 +34,4 @@ class KickoffInputs(TypedDict, total=False):
files: Named file inputs accessible to tasks during execution.
"""
files: dict[str, FileInput]
files: dict[str, FileInput]

View File

@@ -1,104 +0,0 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What
type of document is this? Answer in one word."},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stream":false}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '748'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.71.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAA/3WQTUvEMBCG/8ucW2jr7rL25sKCKHrQiyASYjJsw6ZJzUxEKf3vTheLX3hKeJ8n
8zIZoY8WPbRgvM4Wy7NyXXbaHXPZVM2qrpoGCnBWhJ4Oqqovd/nBnt92tF1dX+z3u6t7ffO8FYff
B5wtJNIHlCBFPweayBHrwBKZGBjl1j6Oi8/4NpPT0cIdUu4RpqcCiOOgEmqKQQAGqzinAJ+A8CVj
MDIhZO8LyKfSdgQXhsyK4xEDQVtvmo3UatOhMjKMXQzqp1ItXLD9jy1v5wYcOuwxaa/W/V//i9bd
bzoVEDN/j1ayDqZXZ1CxwySLzl9ldbIwTR/rySkqnAEAAA==
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 22 Jan 2026 00:18:50 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-01-22T00:18:50Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '750'
status:
code: 200
message: OK
version: 1

File diff suppressed because one or more lines are too long

View File

@@ -1,104 +0,0 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What
type of document is this? Answer in one word."},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stream":false}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '748'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.71.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAA/3WQTUvEMBCG/8ucW2hju4eeRUU97EFRFAkhGbZh06Qmk1Up/e9OF4tf7CnhfZ7J
y2SCIRh00IF2Khssz8q27JXd51JUoqkrIaAAa1gY0k5W9bbptXo7PD60l/V1f/V0J+5vxQ079DHi
YmFKaoccxOCWQKVkEylPHOngCfnWPU+rT/i+kOPRwfb8AuaXAhKFUUZUKXhO0RtJOXr4AglfM3rN
4z47V0A+NnYTWD9mkhT26BN09UZsuFPpHqXmx8gGL38r1coZm1NsnV0acOxxwKicbIf//jet+790
LiBk+hk1vA7Gg9UoyWLkRZd/MioamOdP24g1JZkBAAA=
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 22 Jan 2026 00:18:56 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-01-22T00:18:55Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '648'
status:
code: 200
message: OK
version: 1

File diff suppressed because one or more lines are too long

View File

@@ -7,7 +7,7 @@ from unittest.mock import patch
import pytest
from crewai.llm import LLM
from crewai.files import ImageFile, PDFFile, TextFile
from crewai_files import ImageFile, PDFFile, TextFile, format_multimodal_content
# Check for optional provider dependencies
try:
@@ -124,27 +124,18 @@ class TestLiteLLMMultimodal:
llm = LLM(model="gpt-4o", is_litellm=True)
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
assert "data:image/png;base64," in result[0]["image_url"]["url"]
def test_format_multimodal_content_non_multimodal(self) -> None:
"""Test non-multimodal model returns empty list."""
llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
assert result == []
def test_format_multimodal_content_unsupported_type(self) -> None:
"""Test unsupported content type is skipped."""
llm = LLM(model="gpt-4o", is_litellm=True) # OpenAI doesn't support PDF
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert result == []
@@ -175,7 +166,7 @@ class TestAnthropicMultimodal:
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image"
@@ -188,7 +179,7 @@ class TestAnthropicMultimodal:
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "document"
@@ -230,7 +221,7 @@ class TestOpenAIMultimodal:
llm = LLM(model="openai/gpt-4o")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -264,7 +255,7 @@ class TestGeminiMultimodal:
llm = LLM(model="gemini/gemini-pro")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "inlineData" in result[0]
@@ -321,7 +312,7 @@ class TestAzureMultimodal:
llm = LLM(model="azure/gpt-4o")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -357,7 +348,7 @@ class TestBedrockMultimodal:
"""Test Bedrock supports images and PDFs."""
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
types = llm.supported_multimodal_content_types()
assert "image/" in types
assert any(t.startswith("image/") for t in types)
assert "application/pdf" in types
def test_format_multimodal_content_image(self) -> None:
@@ -365,7 +356,7 @@ class TestBedrockMultimodal:
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "image" in result[0]
@@ -378,7 +369,7 @@ class TestBedrockMultimodal:
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "document" in result[0]
@@ -411,18 +402,6 @@ class TestBaseLLMMultimodal:
llm = TestLLM(model="test")
assert llm.supported_multimodal_content_types() == []
def test_base_format_multimodal_content_empty(self) -> None:
"""Test base implementation returns empty list."""
from crewai.llms.base_llm import BaseLLM
class TestLLM(BaseLLM):
def call(self, messages, tools=None, callbacks=None):
return "test"
llm = TestLLM(model="test")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
assert llm.format_multimodal_content(files) == []
def test_base_format_text_content(self) -> None:
"""Test base text formatting uses OpenAI/Anthropic style."""
from crewai.llms.base_llm import BaseLLM
@@ -447,7 +426,7 @@ class TestMultipleFilesFormatting:
"chart2": ImageFile(source=MINIMAL_PNG),
}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 2
@@ -460,7 +439,7 @@ class TestMultipleFilesFormatting:
"text": TextFile(source=b"hello"), # Not supported
}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -469,6 +448,6 @@ class TestMultipleFilesFormatting:
"""Test empty files dict returns empty list."""
llm = LLM(model="gpt-4o")
result = llm.format_multimodal_content({})
result = format_multimodal_content({}, llm.model)
assert result == []

View File

@@ -9,13 +9,13 @@ from pathlib import Path
import pytest
from crewai.llm import LLM
from crewai.files import File, ImageFile, PDFFile, TextFile
from crewai_files import File, ImageFile, PDFFile, TextFile, format_multimodal_content
# Path to test data files
TEST_DATA_DIR = Path(__file__).parent.parent.parent.parent.parent / "data"
TEST_IMAGE_PATH = TEST_DATA_DIR / "revenue_chart.png"
TEST_TEXT_PATH = TEST_DATA_DIR / "review_guidelines.txt"
TEST_FIXTURES_DIR = Path(__file__).parent.parent.parent.parent / "crewai-files" / "tests" / "fixtures"
TEST_IMAGE_PATH = TEST_FIXTURES_DIR / "revenue_chart.png"
TEST_TEXT_PATH = TEST_FIXTURES_DIR / "review_guidelines.txt"
@pytest.fixture
@@ -50,7 +50,8 @@ startxref
def _build_multimodal_message(llm: LLM, prompt: str, files: dict) -> list[dict]:
"""Build a multimodal message with text and file content."""
content_blocks = llm.format_multimodal_content(files)
provider = getattr(llm, "provider", None) or llm.model
content_blocks = format_multimodal_content(files, provider)
return [
{
"role": "user",
@@ -124,6 +125,68 @@ class TestAnthropicMultimodalIntegration:
assert len(response) > 0
class TestAzureMultimodalIntegration:
"""Integration tests for Azure OpenAI multimodal with real API calls."""
@pytest.mark.vcr()
def test_describe_image(self, test_image_bytes: bytes) -> None:
"""Test Azure OpenAI can describe an image."""
llm = LLM(model="azure/gpt-4o")
files = {"image": ImageFile(source=test_image_bytes)}
messages = _build_multimodal_message(
llm,
"Describe this image in one sentence. Be brief.",
files,
)
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
class TestBedrockMultimodalIntegration:
"""Integration tests for AWS Bedrock multimodal with real API calls."""
@pytest.mark.vcr()
def test_describe_image(self, test_image_bytes: bytes) -> None:
"""Test Bedrock Claude can describe an image."""
llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
files = {"image": ImageFile(source=test_image_bytes)}
messages = _build_multimodal_message(
llm,
"Describe this image in one sentence. Be brief.",
files,
)
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
@pytest.mark.vcr()
def test_analyze_pdf(self) -> None:
"""Test Bedrock Claude can analyze a PDF."""
llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
files = {"document": PDFFile(source=MINIMAL_PDF)}
messages = _build_multimodal_message(
llm,
"What type of document is this? Answer in one word.",
files,
)
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
class TestGeminiMultimodalIntegration:
"""Integration tests for Gemini multimodal with real API calls."""

View File

@@ -5,7 +5,7 @@ import base64
import pytest
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
from crewai.files import ImageFile, PDFFile, TextFile
from crewai_files import ImageFile, PDFFile, TextFile
class TestReadFileTool:

View File

@@ -13,7 +13,7 @@ from crewai.utilities.file_store import (
store_files,
store_task_files,
)
from crewai.files import TextFile
from crewai_files import TextFile
class TestFileStore:

View File

@@ -6,7 +6,7 @@ from pathlib import Path
import pytest
from crewai.files import (
from crewai_files import (
AudioFile,
File,
FileBytes,
@@ -20,7 +20,7 @@ from crewai.files import (
normalize_input_files,
wrap_file_source,
)
from crewai.files.file import detect_content_type
from crewai_files.core.sources import detect_content_type
class TestDetectContentType: