mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-23 07:08:14 +00:00
refactor: improve multimodal file handling architecture
- Make crewai_files an optional dependency with graceful fallbacks - Move file formatting from executor to LLM layer (_process_message_files) - Add files field to LLMMessage type for cleaner message passing - Add cache_control to Anthropic content blocks for prompt caching - Clean up formatters: static methods for OpenAI/Gemini, proper error handling - Remove unused ContentFormatter protocol - Move test fixtures to lib/crewai-files/tests/fixtures - Add Azure and Bedrock multimodal integration tests - Fix mypy errors in crew_agent_executor.py
This commit is contained in:
@@ -8,7 +8,8 @@ from typing import Any
|
||||
from crewai_files.core.resolved import (
|
||||
FileReference,
|
||||
InlineBase64,
|
||||
ResolvedFile,
|
||||
InlineBytes,
|
||||
ResolvedFileType,
|
||||
UrlReference,
|
||||
)
|
||||
from crewai_files.core.types import FileInput
|
||||
@@ -20,7 +21,7 @@ class AnthropicFormatter:
|
||||
def format_block(
|
||||
self,
|
||||
file: FileInput,
|
||||
resolved: ResolvedFile,
|
||||
resolved: ResolvedFileType,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Format a resolved file into an Anthropic content block.
|
||||
|
||||
@@ -43,6 +44,7 @@ class AnthropicFormatter:
|
||||
"type": "file",
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
|
||||
if isinstance(resolved, UrlReference):
|
||||
@@ -52,6 +54,7 @@ class AnthropicFormatter:
|
||||
"type": "url",
|
||||
"url": resolved.url,
|
||||
},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
|
||||
if isinstance(resolved, InlineBase64):
|
||||
@@ -62,17 +65,21 @@ class AnthropicFormatter:
|
||||
"media_type": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
|
||||
data = base64.b64encode(file.read()).decode("ascii")
|
||||
return {
|
||||
"type": block_type,
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": content_type,
|
||||
"data": data,
|
||||
},
|
||||
}
|
||||
if isinstance(resolved, InlineBytes):
|
||||
return {
|
||||
"type": block_type,
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": resolved.content_type,
|
||||
"data": base64.b64encode(resolved.data).decode("ascii"),
|
||||
},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
|
||||
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
|
||||
|
||||
@staticmethod
|
||||
def _get_block_type(content_type: str) -> str | None:
|
||||
|
||||
@@ -274,4 +274,8 @@ def _format_block(
|
||||
"""
|
||||
if isinstance(formatter, BedrockFormatter):
|
||||
return formatter.format_block(file_input, resolved, name=name)
|
||||
return formatter.format_block(file_input, resolved)
|
||||
if isinstance(formatter, AnthropicFormatter):
|
||||
return formatter.format_block(file_input, resolved)
|
||||
if isinstance(formatter, (OpenAIFormatter, GeminiFormatter)):
|
||||
return formatter.format_block(resolved)
|
||||
raise TypeError(f"Unknown formatter type: {type(formatter).__name__}")
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
"""Base formatter protocol for provider-specific content blocks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Protocol
|
||||
|
||||
from crewai_files.core.resolved import ResolvedFile
|
||||
from crewai_files.core.types import FileInput
|
||||
|
||||
|
||||
class ContentFormatter(Protocol):
|
||||
"""Protocol for formatting resolved files into provider content blocks."""
|
||||
|
||||
def format_block(
|
||||
self,
|
||||
file: FileInput,
|
||||
resolved: ResolvedFile,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Format a resolved file into a provider-specific content block.
|
||||
|
||||
Args:
|
||||
file: Original file input with metadata.
|
||||
resolved: Resolved file (FileReference, InlineBase64, etc.).
|
||||
|
||||
Returns:
|
||||
Content block dict or None if file type not supported.
|
||||
"""
|
||||
...
|
||||
@@ -2,12 +2,15 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from typing import Any
|
||||
|
||||
from crewai_files.core.resolved import (
|
||||
FileReference,
|
||||
InlineBase64,
|
||||
InlineBytes,
|
||||
ResolvedFile,
|
||||
ResolvedFileType,
|
||||
UrlReference,
|
||||
)
|
||||
from crewai_files.core.types import FileInput
|
||||
|
||||
@@ -49,7 +52,7 @@ class BedrockFormatter:
|
||||
def format_block(
|
||||
self,
|
||||
file: FileInput,
|
||||
resolved: ResolvedFile,
|
||||
resolved: ResolvedFileType,
|
||||
name: str | None = None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Format a resolved file into a Bedrock content block.
|
||||
@@ -64,15 +67,24 @@ class BedrockFormatter:
|
||||
"""
|
||||
content_type = file.content_type
|
||||
|
||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||
if isinstance(resolved, FileReference):
|
||||
if not resolved.file_uri:
|
||||
raise ValueError("Bedrock requires file_uri for FileReference (S3 URI)")
|
||||
return self._format_s3_block(content_type, resolved.file_uri, name)
|
||||
|
||||
if isinstance(resolved, InlineBytes):
|
||||
file_bytes = resolved.data
|
||||
else:
|
||||
file_bytes = file.read()
|
||||
return self._format_bytes_block(content_type, resolved.data, name)
|
||||
|
||||
return self._format_bytes_block(content_type, file_bytes, name)
|
||||
if isinstance(resolved, InlineBase64):
|
||||
file_bytes = base64.b64decode(resolved.data)
|
||||
return self._format_bytes_block(content_type, file_bytes, name)
|
||||
|
||||
if isinstance(resolved, UrlReference):
|
||||
raise ValueError(
|
||||
"Bedrock does not support URL references - resolve to bytes first"
|
||||
)
|
||||
|
||||
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
|
||||
|
||||
def _format_s3_block(
|
||||
self,
|
||||
|
||||
@@ -8,32 +8,31 @@ from typing import Any
|
||||
from crewai_files.core.resolved import (
|
||||
FileReference,
|
||||
InlineBase64,
|
||||
ResolvedFile,
|
||||
InlineBytes,
|
||||
ResolvedFileType,
|
||||
UrlReference,
|
||||
)
|
||||
from crewai_files.core.types import FileInput
|
||||
|
||||
|
||||
class GeminiFormatter:
|
||||
"""Formats resolved files into Gemini content blocks."""
|
||||
|
||||
def format_block(
|
||||
self,
|
||||
file: FileInput,
|
||||
resolved: ResolvedFile,
|
||||
) -> dict[str, Any] | None:
|
||||
@staticmethod
|
||||
def format_block(resolved: ResolvedFileType) -> dict[str, Any]:
|
||||
"""Format a resolved file into a Gemini content block.
|
||||
|
||||
Args:
|
||||
file: Original file input with metadata.
|
||||
resolved: Resolved file.
|
||||
|
||||
Returns:
|
||||
Content block dict or None if not supported.
|
||||
"""
|
||||
content_type = file.content_type
|
||||
Content block dict.
|
||||
|
||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||
Raises:
|
||||
TypeError: If resolved type is not supported.
|
||||
"""
|
||||
if isinstance(resolved, FileReference):
|
||||
if not resolved.file_uri:
|
||||
raise ValueError("Gemini requires file_uri for FileReference")
|
||||
return {
|
||||
"fileData": {
|
||||
"mimeType": resolved.content_type,
|
||||
@@ -44,7 +43,7 @@ class GeminiFormatter:
|
||||
if isinstance(resolved, UrlReference):
|
||||
return {
|
||||
"fileData": {
|
||||
"mimeType": content_type,
|
||||
"mimeType": resolved.content_type,
|
||||
"fileUri": resolved.url,
|
||||
}
|
||||
}
|
||||
@@ -57,10 +56,12 @@ class GeminiFormatter:
|
||||
}
|
||||
}
|
||||
|
||||
data = base64.b64encode(file.read()).decode("ascii")
|
||||
return {
|
||||
"inlineData": {
|
||||
"mimeType": content_type,
|
||||
"data": data,
|
||||
if isinstance(resolved, InlineBytes):
|
||||
return {
|
||||
"inlineData": {
|
||||
"mimeType": resolved.content_type,
|
||||
"data": base64.b64encode(resolved.data).decode("ascii"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
|
||||
|
||||
@@ -8,31 +8,28 @@ from typing import Any
|
||||
from crewai_files.core.resolved import (
|
||||
FileReference,
|
||||
InlineBase64,
|
||||
ResolvedFile,
|
||||
InlineBytes,
|
||||
ResolvedFileType,
|
||||
UrlReference,
|
||||
)
|
||||
from crewai_files.core.types import FileInput
|
||||
|
||||
|
||||
class OpenAIFormatter:
|
||||
"""Formats resolved files into OpenAI content blocks."""
|
||||
|
||||
def format_block(
|
||||
self,
|
||||
file: FileInput,
|
||||
resolved: ResolvedFile,
|
||||
) -> dict[str, Any] | None:
|
||||
@staticmethod
|
||||
def format_block(resolved: ResolvedFileType) -> dict[str, Any]:
|
||||
"""Format a resolved file into an OpenAI content block.
|
||||
|
||||
Args:
|
||||
file: Original file input with metadata.
|
||||
resolved: Resolved file.
|
||||
|
||||
Returns:
|
||||
Content block dict or None if not supported.
|
||||
"""
|
||||
content_type = file.content_type
|
||||
Content block dict.
|
||||
|
||||
Raises:
|
||||
TypeError: If resolved type is not supported.
|
||||
"""
|
||||
if isinstance(resolved, FileReference):
|
||||
return {
|
||||
"type": "file",
|
||||
@@ -53,8 +50,11 @@ class OpenAIFormatter:
|
||||
},
|
||||
}
|
||||
|
||||
data = base64.b64encode(file.read()).decode("ascii")
|
||||
return {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
||||
}
|
||||
if isinstance(resolved, InlineBytes):
|
||||
data = base64.b64encode(resolved.data).decode("ascii")
|
||||
return {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{resolved.content_type};base64,{data}"},
|
||||
}
|
||||
|
||||
raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
|
||||
|
||||
|
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB |
@@ -3,15 +3,6 @@ from typing import Any
|
||||
import urllib.request
|
||||
import warnings
|
||||
|
||||
from crewai_files import (
|
||||
AudioFile,
|
||||
File,
|
||||
ImageFile,
|
||||
PDFFile,
|
||||
TextFile,
|
||||
VideoFile,
|
||||
)
|
||||
|
||||
from crewai.agent.core import Agent
|
||||
from crewai.crew import Crew
|
||||
from crewai.crews.crew_output import CrewOutput
|
||||
@@ -83,20 +74,14 @@ _track_install_async()
|
||||
__all__ = [
|
||||
"LLM",
|
||||
"Agent",
|
||||
"AudioFile",
|
||||
"BaseLLM",
|
||||
"Crew",
|
||||
"CrewOutput",
|
||||
"File",
|
||||
"Flow",
|
||||
"ImageFile",
|
||||
"Knowledge",
|
||||
"LLMGuardrail",
|
||||
"PDFFile",
|
||||
"Process",
|
||||
"Task",
|
||||
"TaskOutput",
|
||||
"TextFile",
|
||||
"VideoFile",
|
||||
"__version__",
|
||||
]
|
||||
|
||||
@@ -10,7 +10,6 @@ from collections.abc import Callable
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
from crewai_files import aformat_multimodal_content, format_multimodal_content
|
||||
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
|
||||
from pydantic_core import CoreSchema, core_schema
|
||||
|
||||
@@ -46,7 +45,7 @@ from crewai.utilities.agent_utils import (
|
||||
track_delegation_if_needed,
|
||||
)
|
||||
from crewai.utilities.constants import TRAINING_DATA_FILE
|
||||
from crewai.utilities.file_store import get_all_files
|
||||
from crewai.utilities.file_store import aget_all_files, get_all_files
|
||||
from crewai.utilities.i18n import I18N, get_i18n
|
||||
from crewai.utilities.printer import Printer
|
||||
from crewai.utilities.string_utils import sanitize_tool_name
|
||||
@@ -220,71 +219,41 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
return {"output": formatted_answer.output}
|
||||
|
||||
def _inject_multimodal_files(self) -> None:
|
||||
"""Inject files as multimodal content into messages.
|
||||
"""Attach files to the last user message for LLM-layer formatting.
|
||||
|
||||
For crews with input files and LLMs that support multimodal,
|
||||
uses crewai_files to process, resolve, and format files into
|
||||
provider-specific content blocks.
|
||||
Retrieves crew and task files and attaches them to the message's
|
||||
`files` field. The LLM layer handles provider-specific formatting.
|
||||
"""
|
||||
if not self.crew or not self.task:
|
||||
return
|
||||
|
||||
if not self.llm.supports_multimodal():
|
||||
return
|
||||
|
||||
files = get_all_files(self.crew.id, self.task.id)
|
||||
if not files:
|
||||
return
|
||||
|
||||
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
||||
content_blocks = format_multimodal_content(files, provider)
|
||||
|
||||
if not content_blocks:
|
||||
return
|
||||
|
||||
for i in range(len(self.messages) - 1, -1, -1):
|
||||
msg = self.messages[i]
|
||||
if msg.get("role") == "user":
|
||||
existing_content = msg.get("content", "")
|
||||
if isinstance(existing_content, str):
|
||||
msg["content"] = [
|
||||
self.llm.format_text_content(existing_content),
|
||||
*content_blocks,
|
||||
]
|
||||
msg["files"] = files
|
||||
break
|
||||
|
||||
async def _ainject_multimodal_files(self) -> None:
|
||||
"""Async inject files as multimodal content into messages.
|
||||
"""Async attach files to the last user message for LLM-layer formatting.
|
||||
|
||||
For crews with input files and LLMs that support multimodal,
|
||||
uses crewai_files to process, resolve, and format files into
|
||||
provider-specific content blocks with parallel file resolution.
|
||||
Retrieves crew and task files and attaches them to the message's
|
||||
`files` field. The LLM layer handles provider-specific formatting.
|
||||
"""
|
||||
if not self.crew or not self.task:
|
||||
return
|
||||
|
||||
if not self.llm.supports_multimodal():
|
||||
return
|
||||
|
||||
files = get_all_files(self.crew.id, self.task.id)
|
||||
files = await aget_all_files(self.crew.id, self.task.id)
|
||||
if not files:
|
||||
return
|
||||
|
||||
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
||||
content_blocks = await aformat_multimodal_content(files, provider)
|
||||
|
||||
if not content_blocks:
|
||||
return
|
||||
|
||||
for i in range(len(self.messages) - 1, -1, -1):
|
||||
msg = self.messages[i]
|
||||
if msg.get("role") == "user":
|
||||
existing_content = msg.get("content", "")
|
||||
if isinstance(existing_content, str):
|
||||
msg["content"] = [
|
||||
self.llm.format_text_content(existing_content),
|
||||
*content_blocks,
|
||||
]
|
||||
msg["files"] = files
|
||||
break
|
||||
|
||||
def _invoke_loop(self) -> AgentFinish:
|
||||
@@ -772,7 +741,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
if (
|
||||
original_tool
|
||||
and hasattr(original_tool, "cache_function")
|
||||
and original_tool.cache_function
|
||||
and callable(original_tool.cache_function)
|
||||
):
|
||||
should_cache = original_tool.cache_function(
|
||||
args_dict, raw_result
|
||||
@@ -803,7 +772,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
error=e,
|
||||
),
|
||||
)
|
||||
elif max_usage_reached:
|
||||
elif max_usage_reached and original_tool:
|
||||
# Return error message when max usage limit is reached
|
||||
result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."
|
||||
|
||||
|
||||
@@ -6,14 +6,6 @@ import asyncio
|
||||
from collections.abc import Callable, Coroutine, Iterable
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from crewai_files import (
|
||||
AudioFile,
|
||||
ImageFile,
|
||||
PDFFile,
|
||||
TextFile,
|
||||
VideoFile,
|
||||
)
|
||||
|
||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||
from crewai.crews.crew_output import CrewOutput
|
||||
from crewai.rag.embeddings.types import EmbedderConfig
|
||||
@@ -27,6 +19,20 @@ from crewai.utilities.streaming import (
|
||||
from crewai.utilities.types import KickoffInputs
|
||||
|
||||
|
||||
try:
|
||||
from crewai_files import (
|
||||
AudioFile,
|
||||
ImageFile,
|
||||
PDFFile,
|
||||
TextFile,
|
||||
VideoFile,
|
||||
)
|
||||
|
||||
_FILE_TYPES: tuple[type, ...] = (AudioFile, ImageFile, PDFFile, TextFile, VideoFile)
|
||||
except ImportError:
|
||||
_FILE_TYPES = ()
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai.crew import Crew
|
||||
|
||||
@@ -198,12 +204,14 @@ def _extract_files_from_inputs(inputs: dict[str, Any]) -> dict[str, Any]:
|
||||
Returns:
|
||||
Dictionary of extracted file objects.
|
||||
"""
|
||||
file_types = (AudioFile, ImageFile, PDFFile, TextFile, VideoFile)
|
||||
if not _FILE_TYPES:
|
||||
return {}
|
||||
|
||||
files: dict[str, Any] = {}
|
||||
keys_to_remove: list[str] = []
|
||||
|
||||
for key, value in inputs.items():
|
||||
if isinstance(value, file_types):
|
||||
if isinstance(value, _FILE_TYPES):
|
||||
files[key] = value
|
||||
keys_to_remove.append(key)
|
||||
|
||||
|
||||
@@ -53,6 +53,14 @@ from crewai.utilities.logger_utils import suppress_warnings
|
||||
from crewai.utilities.string_utils import sanitize_tool_name
|
||||
|
||||
|
||||
try:
|
||||
from crewai_files import aformat_multimodal_content, format_multimodal_content
|
||||
|
||||
HAS_CREWAI_FILES = True
|
||||
except ImportError:
|
||||
HAS_CREWAI_FILES = False
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.exceptions import ContextWindowExceededError
|
||||
from litellm.litellm_core_utils.get_supported_openai_params import (
|
||||
@@ -661,12 +669,14 @@ class LLM(BaseLLM):
|
||||
self,
|
||||
messages: str | list[LLMMessage],
|
||||
tools: list[dict[str, BaseTool]] | None = None,
|
||||
skip_file_processing: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Prepare parameters for the completion call.
|
||||
|
||||
Args:
|
||||
messages: Input messages for the LLM
|
||||
tools: Optional list of tool schemas
|
||||
skip_file_processing: Skip file processing (used when already done async)
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Parameters for the completion call
|
||||
@@ -674,6 +684,9 @@ class LLM(BaseLLM):
|
||||
# --- 1) Format messages according to provider requirements
|
||||
if isinstance(messages, str):
|
||||
messages = [{"role": "user", "content": messages}]
|
||||
# --- 1a) Process any file attachments into multimodal content
|
||||
if not skip_file_processing:
|
||||
messages = self._process_message_files(messages)
|
||||
formatted_messages = self._format_messages_for_provider(messages)
|
||||
|
||||
# --- 2) Prepare the parameters for the completion call
|
||||
@@ -1799,6 +1812,9 @@ class LLM(BaseLLM):
|
||||
if isinstance(messages, str):
|
||||
messages = [{"role": "user", "content": messages}]
|
||||
|
||||
# Process file attachments asynchronously before preparing params
|
||||
messages = await self._aprocess_message_files(messages)
|
||||
|
||||
if "o1" in self.model.lower():
|
||||
for message in messages:
|
||||
if message.get("role") == "system":
|
||||
@@ -1809,7 +1825,9 @@ class LLM(BaseLLM):
|
||||
if callbacks and len(callbacks) > 0:
|
||||
self.set_callbacks(callbacks)
|
||||
try:
|
||||
params = self._prepare_completion_params(messages, tools)
|
||||
params = self._prepare_completion_params(
|
||||
messages, tools, skip_file_processing=True
|
||||
)
|
||||
|
||||
if self.stream:
|
||||
return await self._ahandle_streaming_response(
|
||||
@@ -1896,6 +1914,88 @@ class LLM(BaseLLM):
|
||||
),
|
||||
)
|
||||
|
||||
def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]:
|
||||
"""Process files attached to messages and format for provider.
|
||||
|
||||
For each message with a `files` field, formats the files into
|
||||
provider-specific content blocks and updates the message content.
|
||||
|
||||
Args:
|
||||
messages: List of messages that may contain file attachments.
|
||||
|
||||
Returns:
|
||||
Messages with files formatted into content blocks.
|
||||
"""
|
||||
if not HAS_CREWAI_FILES or not self.supports_multimodal():
|
||||
return messages
|
||||
|
||||
provider = getattr(self, "provider", None) or self.model
|
||||
|
||||
for msg in messages:
|
||||
files = msg.get("files")
|
||||
if not files:
|
||||
continue
|
||||
|
||||
content_blocks = format_multimodal_content(files, provider)
|
||||
if not content_blocks:
|
||||
msg.pop("files", None)
|
||||
continue
|
||||
|
||||
existing_content = msg.get("content", "")
|
||||
if isinstance(existing_content, str):
|
||||
msg["content"] = [
|
||||
self.format_text_content(existing_content),
|
||||
*content_blocks,
|
||||
]
|
||||
elif isinstance(existing_content, list):
|
||||
msg["content"] = [*existing_content, *content_blocks]
|
||||
|
||||
msg.pop("files", None)
|
||||
|
||||
return messages
|
||||
|
||||
async def _aprocess_message_files(
|
||||
self, messages: list[LLMMessage]
|
||||
) -> list[LLMMessage]:
|
||||
"""Async process files attached to messages and format for provider.
|
||||
|
||||
For each message with a `files` field, formats the files into
|
||||
provider-specific content blocks and updates the message content.
|
||||
|
||||
Args:
|
||||
messages: List of messages that may contain file attachments.
|
||||
|
||||
Returns:
|
||||
Messages with files formatted into content blocks.
|
||||
"""
|
||||
if not HAS_CREWAI_FILES or not self.supports_multimodal():
|
||||
return messages
|
||||
|
||||
provider = getattr(self, "provider", None) or self.model
|
||||
|
||||
for msg in messages:
|
||||
files = msg.get("files")
|
||||
if not files:
|
||||
continue
|
||||
|
||||
content_blocks = await aformat_multimodal_content(files, provider)
|
||||
if not content_blocks:
|
||||
msg.pop("files", None)
|
||||
continue
|
||||
|
||||
existing_content = msg.get("content", "")
|
||||
if isinstance(existing_content, str):
|
||||
msg["content"] = [
|
||||
self.format_text_content(existing_content),
|
||||
*content_blocks,
|
||||
]
|
||||
elif isinstance(existing_content, list):
|
||||
msg["content"] = [*existing_content, *content_blocks]
|
||||
|
||||
msg.pop("files", None)
|
||||
|
||||
return messages
|
||||
|
||||
def _format_messages_for_provider(
|
||||
self, messages: list[LLMMessage]
|
||||
) -> list[dict[str, str]]:
|
||||
|
||||
@@ -19,12 +19,6 @@ from typing import (
|
||||
import uuid
|
||||
import warnings
|
||||
|
||||
from crewai_files import (
|
||||
FileInput,
|
||||
FilePath,
|
||||
FileSourceInput,
|
||||
normalize_input_files,
|
||||
)
|
||||
from pydantic import (
|
||||
UUID4,
|
||||
BaseModel,
|
||||
@@ -55,6 +49,17 @@ from crewai.utilities.file_store import (
|
||||
get_all_files,
|
||||
store_task_files,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
from crewai_files import (
|
||||
FilePath,
|
||||
normalize_input_files,
|
||||
)
|
||||
|
||||
HAS_CREWAI_FILES = True
|
||||
except ImportError:
|
||||
HAS_CREWAI_FILES = False
|
||||
from crewai.utilities.guardrail import (
|
||||
process_guardrail,
|
||||
)
|
||||
@@ -153,7 +158,7 @@ class Task(BaseModel):
|
||||
default_factory=list,
|
||||
description="Tools the agent is limited to use for this task.",
|
||||
)
|
||||
input_files: list[FileSourceInput | FileInput] = Field(
|
||||
input_files: list[Any] = Field(
|
||||
default_factory=list,
|
||||
description="List of input files for this task. Accepts paths, bytes, or File objects.",
|
||||
)
|
||||
@@ -379,6 +384,9 @@ class Task(BaseModel):
|
||||
if not v:
|
||||
return v
|
||||
|
||||
if not HAS_CREWAI_FILES:
|
||||
return v
|
||||
|
||||
result = []
|
||||
for item in v:
|
||||
if isinstance(item, str):
|
||||
@@ -1034,7 +1042,7 @@ Follow these guidelines:
|
||||
|
||||
Converts input_files list to a named dict and stores under task ID.
|
||||
"""
|
||||
if not self.input_files:
|
||||
if not HAS_CREWAI_FILES or not self.input_files:
|
||||
return
|
||||
|
||||
files_dict = normalize_input_files(self.input_files)
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
"""Types for CrewAI utilities."""
|
||||
|
||||
from typing import Any, Literal
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from crewai_files import FileInput
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput
|
||||
|
||||
|
||||
class LLMMessage(TypedDict):
|
||||
"""Type for formatted LLM messages.
|
||||
|
||||
@@ -19,6 +24,7 @@ class LLMMessage(TypedDict):
|
||||
tool_call_id: NotRequired[str]
|
||||
name: NotRequired[str]
|
||||
tool_calls: NotRequired[list[dict[str, Any]]]
|
||||
files: NotRequired[dict[str, FileInput]]
|
||||
|
||||
|
||||
class KickoffInputs(TypedDict, total=False):
|
||||
@@ -28,4 +34,4 @@ class KickoffInputs(TypedDict, total=False):
|
||||
files: Named file inputs accessible to tasks during execution.
|
||||
"""
|
||||
|
||||
files: dict[str, FileInput]
|
||||
files: dict[str, FileInput]
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What
|
||||
type of document is this? Answer in one word."},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stream":false}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
anthropic-version:
|
||||
- '2023-06-01'
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '748'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.anthropic.com
|
||||
x-api-key:
|
||||
- X-API-KEY-XXX
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 0.71.1
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.10
|
||||
x-stainless-timeout:
|
||||
- NOT_GIVEN
|
||||
method: POST
|
||||
uri: https://api.anthropic.com/v1/messages
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAA/3WQTUvEMBCG/8ucW2jr7rL25sKCKHrQiyASYjJsw6ZJzUxEKf3vTheLX3hKeJ8n
|
||||
8zIZoY8WPbRgvM4Wy7NyXXbaHXPZVM2qrpoGCnBWhJ4Oqqovd/nBnt92tF1dX+z3u6t7ffO8FYff
|
||||
B5wtJNIHlCBFPweayBHrwBKZGBjl1j6Oi8/4NpPT0cIdUu4RpqcCiOOgEmqKQQAGqzinAJ+A8CVj
|
||||
MDIhZO8LyKfSdgQXhsyK4xEDQVtvmo3UatOhMjKMXQzqp1ItXLD9jy1v5wYcOuwxaa/W/V//i9bd
|
||||
bzoVEDN/j1ayDqZXZ1CxwySLzl9ldbIwTR/rySkqnAEAAA==
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Thu, 22 Jan 2026 00:18:50 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Robots-Tag:
|
||||
- none
|
||||
anthropic-organization-id:
|
||||
- ANTHROPIC-ORGANIZATION-ID-XXX
|
||||
anthropic-ratelimit-input-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-input-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-input-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-output-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-output-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-output-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-requests-limit:
|
||||
- '4000'
|
||||
anthropic-ratelimit-requests-remaining:
|
||||
- '3999'
|
||||
anthropic-ratelimit-requests-reset:
|
||||
- '2026-01-22T00:18:50Z'
|
||||
anthropic-ratelimit-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
request-id:
|
||||
- REQUEST-ID-XXX
|
||||
strict-transport-security:
|
||||
- STS-XXX
|
||||
x-envoy-upstream-service-time:
|
||||
- '750'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,104 +0,0 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What
|
||||
type of document is this? Answer in one word."},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stream":false}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
anthropic-version:
|
||||
- '2023-06-01'
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '748'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.anthropic.com
|
||||
x-api-key:
|
||||
- X-API-KEY-XXX
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 0.71.1
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.10
|
||||
x-stainless-timeout:
|
||||
- NOT_GIVEN
|
||||
method: POST
|
||||
uri: https://api.anthropic.com/v1/messages
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAA/3WQTUvEMBCG/8ucW2hju4eeRUU97EFRFAkhGbZh06Qmk1Up/e9OF4tf7CnhfZ7J
|
||||
y2SCIRh00IF2Khssz8q27JXd51JUoqkrIaAAa1gY0k5W9bbptXo7PD60l/V1f/V0J+5vxQ079DHi
|
||||
YmFKaoccxOCWQKVkEylPHOngCfnWPU+rT/i+kOPRwfb8AuaXAhKFUUZUKXhO0RtJOXr4AglfM3rN
|
||||
4z47V0A+NnYTWD9mkhT26BN09UZsuFPpHqXmx8gGL38r1coZm1NsnV0acOxxwKicbIf//jet+790
|
||||
LiBk+hk1vA7Gg9UoyWLkRZd/MioamOdP24g1JZkBAAA=
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Thu, 22 Jan 2026 00:18:56 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Robots-Tag:
|
||||
- none
|
||||
anthropic-organization-id:
|
||||
- ANTHROPIC-ORGANIZATION-ID-XXX
|
||||
anthropic-ratelimit-input-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-input-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-input-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-output-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-output-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-output-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
|
||||
anthropic-ratelimit-requests-limit:
|
||||
- '4000'
|
||||
anthropic-ratelimit-requests-remaining:
|
||||
- '3999'
|
||||
anthropic-ratelimit-requests-reset:
|
||||
- '2026-01-22T00:18:55Z'
|
||||
anthropic-ratelimit-tokens-limit:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
|
||||
anthropic-ratelimit-tokens-remaining:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
|
||||
anthropic-ratelimit-tokens-reset:
|
||||
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
request-id:
|
||||
- REQUEST-ID-XXX
|
||||
strict-transport-security:
|
||||
- STS-XXX
|
||||
x-envoy-upstream-service-time:
|
||||
- '648'
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
|
||||
from crewai.llm import LLM
|
||||
from crewai.files import ImageFile, PDFFile, TextFile
|
||||
from crewai_files import ImageFile, PDFFile, TextFile, format_multimodal_content
|
||||
|
||||
# Check for optional provider dependencies
|
||||
try:
|
||||
@@ -124,27 +124,18 @@ class TestLiteLLMMultimodal:
|
||||
llm = LLM(model="gpt-4o", is_litellm=True)
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "image_url"
|
||||
assert "data:image/png;base64," in result[0]["image_url"]["url"]
|
||||
|
||||
def test_format_multimodal_content_non_multimodal(self) -> None:
|
||||
"""Test non-multimodal model returns empty list."""
|
||||
llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
|
||||
assert result == []
|
||||
|
||||
def test_format_multimodal_content_unsupported_type(self) -> None:
|
||||
"""Test unsupported content type is skipped."""
|
||||
llm = LLM(model="gpt-4o", is_litellm=True) # OpenAI doesn't support PDF
|
||||
files = {"doc": PDFFile(source=MINIMAL_PDF)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert result == []
|
||||
|
||||
@@ -175,7 +166,7 @@ class TestAnthropicMultimodal:
|
||||
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "image"
|
||||
@@ -188,7 +179,7 @@ class TestAnthropicMultimodal:
|
||||
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
|
||||
files = {"doc": PDFFile(source=MINIMAL_PDF)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "document"
|
||||
@@ -230,7 +221,7 @@ class TestOpenAIMultimodal:
|
||||
llm = LLM(model="openai/gpt-4o")
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "image_url"
|
||||
@@ -264,7 +255,7 @@ class TestGeminiMultimodal:
|
||||
llm = LLM(model="gemini/gemini-pro")
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert "inlineData" in result[0]
|
||||
@@ -321,7 +312,7 @@ class TestAzureMultimodal:
|
||||
llm = LLM(model="azure/gpt-4o")
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "image_url"
|
||||
@@ -357,7 +348,7 @@ class TestBedrockMultimodal:
|
||||
"""Test Bedrock supports images and PDFs."""
|
||||
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
|
||||
types = llm.supported_multimodal_content_types()
|
||||
assert "image/" in types
|
||||
assert any(t.startswith("image/") for t in types)
|
||||
assert "application/pdf" in types
|
||||
|
||||
def test_format_multimodal_content_image(self) -> None:
|
||||
@@ -365,7 +356,7 @@ class TestBedrockMultimodal:
|
||||
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert "image" in result[0]
|
||||
@@ -378,7 +369,7 @@ class TestBedrockMultimodal:
|
||||
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
|
||||
files = {"doc": PDFFile(source=MINIMAL_PDF)}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert "document" in result[0]
|
||||
@@ -411,18 +402,6 @@ class TestBaseLLMMultimodal:
|
||||
llm = TestLLM(model="test")
|
||||
assert llm.supported_multimodal_content_types() == []
|
||||
|
||||
def test_base_format_multimodal_content_empty(self) -> None:
|
||||
"""Test base implementation returns empty list."""
|
||||
from crewai.llms.base_llm import BaseLLM
|
||||
|
||||
class TestLLM(BaseLLM):
|
||||
def call(self, messages, tools=None, callbacks=None):
|
||||
return "test"
|
||||
|
||||
llm = TestLLM(model="test")
|
||||
files = {"chart": ImageFile(source=MINIMAL_PNG)}
|
||||
assert llm.format_multimodal_content(files) == []
|
||||
|
||||
def test_base_format_text_content(self) -> None:
|
||||
"""Test base text formatting uses OpenAI/Anthropic style."""
|
||||
from crewai.llms.base_llm import BaseLLM
|
||||
@@ -447,7 +426,7 @@ class TestMultipleFilesFormatting:
|
||||
"chart2": ImageFile(source=MINIMAL_PNG),
|
||||
}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 2
|
||||
|
||||
@@ -460,7 +439,7 @@ class TestMultipleFilesFormatting:
|
||||
"text": TextFile(source=b"hello"), # Not supported
|
||||
}
|
||||
|
||||
result = llm.format_multimodal_content(files)
|
||||
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "image_url"
|
||||
@@ -469,6 +448,6 @@ class TestMultipleFilesFormatting:
|
||||
"""Test empty files dict returns empty list."""
|
||||
llm = LLM(model="gpt-4o")
|
||||
|
||||
result = llm.format_multimodal_content({})
|
||||
result = format_multimodal_content({}, llm.model)
|
||||
|
||||
assert result == []
|
||||
@@ -9,13 +9,13 @@ from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from crewai.llm import LLM
|
||||
from crewai.files import File, ImageFile, PDFFile, TextFile
|
||||
from crewai_files import File, ImageFile, PDFFile, TextFile, format_multimodal_content
|
||||
|
||||
|
||||
# Path to test data files
|
||||
TEST_DATA_DIR = Path(__file__).parent.parent.parent.parent.parent / "data"
|
||||
TEST_IMAGE_PATH = TEST_DATA_DIR / "revenue_chart.png"
|
||||
TEST_TEXT_PATH = TEST_DATA_DIR / "review_guidelines.txt"
|
||||
TEST_FIXTURES_DIR = Path(__file__).parent.parent.parent.parent / "crewai-files" / "tests" / "fixtures"
|
||||
TEST_IMAGE_PATH = TEST_FIXTURES_DIR / "revenue_chart.png"
|
||||
TEST_TEXT_PATH = TEST_FIXTURES_DIR / "review_guidelines.txt"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -50,7 +50,8 @@ startxref
|
||||
|
||||
def _build_multimodal_message(llm: LLM, prompt: str, files: dict) -> list[dict]:
|
||||
"""Build a multimodal message with text and file content."""
|
||||
content_blocks = llm.format_multimodal_content(files)
|
||||
provider = getattr(llm, "provider", None) or llm.model
|
||||
content_blocks = format_multimodal_content(files, provider)
|
||||
return [
|
||||
{
|
||||
"role": "user",
|
||||
@@ -124,6 +125,68 @@ class TestAnthropicMultimodalIntegration:
|
||||
assert len(response) > 0
|
||||
|
||||
|
||||
class TestAzureMultimodalIntegration:
|
||||
"""Integration tests for Azure OpenAI multimodal with real API calls."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_describe_image(self, test_image_bytes: bytes) -> None:
|
||||
"""Test Azure OpenAI can describe an image."""
|
||||
llm = LLM(model="azure/gpt-4o")
|
||||
files = {"image": ImageFile(source=test_image_bytes)}
|
||||
|
||||
messages = _build_multimodal_message(
|
||||
llm,
|
||||
"Describe this image in one sentence. Be brief.",
|
||||
files,
|
||||
)
|
||||
|
||||
response = llm.call(messages)
|
||||
|
||||
assert response
|
||||
assert isinstance(response, str)
|
||||
assert len(response) > 0
|
||||
|
||||
|
||||
class TestBedrockMultimodalIntegration:
|
||||
"""Integration tests for AWS Bedrock multimodal with real API calls."""
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_describe_image(self, test_image_bytes: bytes) -> None:
|
||||
"""Test Bedrock Claude can describe an image."""
|
||||
llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
|
||||
files = {"image": ImageFile(source=test_image_bytes)}
|
||||
|
||||
messages = _build_multimodal_message(
|
||||
llm,
|
||||
"Describe this image in one sentence. Be brief.",
|
||||
files,
|
||||
)
|
||||
|
||||
response = llm.call(messages)
|
||||
|
||||
assert response
|
||||
assert isinstance(response, str)
|
||||
assert len(response) > 0
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_analyze_pdf(self) -> None:
|
||||
"""Test Bedrock Claude can analyze a PDF."""
|
||||
llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
|
||||
files = {"document": PDFFile(source=MINIMAL_PDF)}
|
||||
|
||||
messages = _build_multimodal_message(
|
||||
llm,
|
||||
"What type of document is this? Answer in one word.",
|
||||
files,
|
||||
)
|
||||
|
||||
response = llm.call(messages)
|
||||
|
||||
assert response
|
||||
assert isinstance(response, str)
|
||||
assert len(response) > 0
|
||||
|
||||
|
||||
class TestGeminiMultimodalIntegration:
|
||||
"""Integration tests for Gemini multimodal with real API calls."""
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import base64
|
||||
import pytest
|
||||
|
||||
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
|
||||
from crewai.files import ImageFile, PDFFile, TextFile
|
||||
from crewai_files import ImageFile, PDFFile, TextFile
|
||||
|
||||
|
||||
class TestReadFileTool:
|
||||
|
||||
@@ -13,7 +13,7 @@ from crewai.utilities.file_store import (
|
||||
store_files,
|
||||
store_task_files,
|
||||
)
|
||||
from crewai.files import TextFile
|
||||
from crewai_files import TextFile
|
||||
|
||||
|
||||
class TestFileStore:
|
||||
|
||||
@@ -6,7 +6,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.files import (
|
||||
from crewai_files import (
|
||||
AudioFile,
|
||||
File,
|
||||
FileBytes,
|
||||
@@ -20,7 +20,7 @@ from crewai.files import (
|
||||
normalize_input_files,
|
||||
wrap_file_source,
|
||||
)
|
||||
from crewai.files.file import detect_content_type
|
||||
from crewai_files.core.sources import detect_content_type
|
||||
|
||||
|
||||
class TestDetectContentType:
|
||||
|
||||
Reference in New Issue
Block a user