mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 00:02:36 +00:00
refactor: centralize multimodal formatting in crewai_files
This commit is contained in:
@@ -10,7 +10,7 @@ from collections.abc import Callable
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
from crewai_files import FileProcessor
|
||||
from crewai_files import aformat_multimodal_content, format_multimodal_content
|
||||
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
|
||||
from pydantic_core import CoreSchema, core_schema
|
||||
|
||||
@@ -220,9 +220,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
"""Inject files as multimodal content into messages.
|
||||
|
||||
For crews with input files and LLMs that support multimodal,
|
||||
processes files according to provider constraints and file handling mode,
|
||||
then delegates to the LLM's format_multimodal_content method to
|
||||
generate provider-specific content blocks.
|
||||
uses crewai_files to process, resolve, and format files into
|
||||
provider-specific content blocks.
|
||||
"""
|
||||
if not self.crew or not self.task:
|
||||
return
|
||||
@@ -235,15 +234,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
return
|
||||
|
||||
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
||||
processor = FileProcessor(constraints=provider)
|
||||
files = processor.process_files(files)
|
||||
content_blocks = format_multimodal_content(files, provider)
|
||||
|
||||
from crewai_files import get_upload_cache
|
||||
|
||||
upload_cache = get_upload_cache()
|
||||
content_blocks = self.llm.format_multimodal_content(
|
||||
files, upload_cache=upload_cache
|
||||
)
|
||||
if not content_blocks:
|
||||
return
|
||||
|
||||
@@ -262,9 +254,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
"""Async inject files as multimodal content into messages.
|
||||
|
||||
For crews with input files and LLMs that support multimodal,
|
||||
processes files according to provider constraints using parallel processing,
|
||||
then delegates to the LLM's aformat_multimodal_content method to
|
||||
generate provider-specific content blocks with parallel file resolution.
|
||||
uses crewai_files to process, resolve, and format files into
|
||||
provider-specific content blocks with parallel file resolution.
|
||||
"""
|
||||
if not self.crew or not self.task:
|
||||
return
|
||||
@@ -277,15 +268,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
return
|
||||
|
||||
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
||||
processor = FileProcessor(constraints=provider)
|
||||
files = await processor.aprocess_files(files)
|
||||
content_blocks = await aformat_multimodal_content(files, provider)
|
||||
|
||||
from crewai_files import get_upload_cache
|
||||
|
||||
upload_cache = get_upload_cache()
|
||||
content_blocks = await self.llm.aformat_multimodal_content(
|
||||
files, upload_cache=upload_cache
|
||||
)
|
||||
if not content_blocks:
|
||||
return
|
||||
|
||||
|
||||
@@ -53,7 +53,6 @@ from crewai.utilities.logger_utils import suppress_warnings
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput, UploadCache
|
||||
from litellm.exceptions import ContextWindowExceededError
|
||||
from litellm.litellm_core_utils.get_supported_openai_params import (
|
||||
get_supported_openai_params,
|
||||
@@ -2254,66 +2253,3 @@ class LLM(BaseLLM):
|
||||
if "claude-3" in model_lower or "claude-4" in model_lower:
|
||||
return ["image/", "application/pdf"]
|
||||
return ["image/"]
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as multimodal content blocks for litellm.
|
||||
|
||||
Uses OpenAI-compatible format which litellm translates to provider format.
|
||||
Uses FileResolver for consistent base64 encoding.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache (not used by litellm but kept for interface consistency).
|
||||
|
||||
Returns:
|
||||
List of content blocks in OpenAI's expected format.
|
||||
"""
|
||||
import base64
|
||||
|
||||
from crewai_files import (
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
# LiteLLM uses OpenAI-compatible format
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
|
||||
for file_input in files.values():
|
||||
content_type = file_input.content_type
|
||||
if not any(content_type.startswith(t) for t in supported_types):
|
||||
continue
|
||||
|
||||
resolved = resolver.resolve(file_input, "openai")
|
||||
|
||||
if isinstance(resolved, InlineBase64):
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Fallback to direct base64 encoding
|
||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
@@ -32,8 +32,6 @@ from crewai.types.usage_metrics import UsageMetrics
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput, UploadCache
|
||||
|
||||
from crewai.agent.core import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
@@ -298,43 +296,6 @@ class BaseLLM(ABC):
|
||||
"""
|
||||
return []
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as multimodal content blocks for the LLM.
|
||||
|
||||
Subclasses should override this to provide provider-specific formatting.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
|
||||
Returns:
|
||||
List of content blocks in the provider's expected format.
|
||||
"""
|
||||
return []
|
||||
|
||||
async def aformat_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Async format files as multimodal content blocks for the LLM.
|
||||
|
||||
Default implementation calls the sync version. Subclasses should
|
||||
override to use async file resolution for parallel processing.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
|
||||
Returns:
|
||||
List of content blocks in the provider's expected format.
|
||||
"""
|
||||
return self.format_multimodal_content(files, upload_cache)
|
||||
|
||||
def format_text_content(self, text: str) -> dict[str, Any]:
|
||||
"""Format text as a content block for the LLM.
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -20,12 +19,8 @@ from crewai.utilities.types import LLMMessage
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput, UploadCache
|
||||
|
||||
from crewai.llms.hooks.base import BaseInterceptor
|
||||
|
||||
DEFAULT_CACHE_TTL = "ephemeral"
|
||||
|
||||
try:
|
||||
from anthropic import Anthropic, AsyncAnthropic
|
||||
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
|
||||
@@ -1256,222 +1251,3 @@ class AnthropicCompletion(BaseLLM):
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
return ["image/", "application/pdf"]
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
enable_caching: bool = True,
|
||||
cache_ttl: str | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as Anthropic multimodal content blocks.
|
||||
|
||||
Anthropic supports both base64 inline format and file references via Files API.
|
||||
Uses FileResolver to determine the best delivery method based on file size.
|
||||
Supports prompt caching to reduce costs and latency for repeated file usage.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
enable_caching: Whether to add cache_control markers (default: True).
|
||||
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
|
||||
|
||||
Returns:
|
||||
List of content blocks in Anthropic's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
|
||||
file_list = list(files.values())
|
||||
num_files = len(file_list)
|
||||
|
||||
for i, file_input in enumerate(file_list):
|
||||
content_type = file_input.content_type
|
||||
if not any(content_type.startswith(t) for t in supported_types):
|
||||
continue
|
||||
|
||||
resolved = resolver.resolve(file_input, "anthropic")
|
||||
block: dict[str, Any] = {}
|
||||
|
||||
if isinstance(resolved, FileReference):
|
||||
if content_type.startswith("image/"):
|
||||
block = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "file",
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
}
|
||||
elif content_type == "application/pdf":
|
||||
block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "file",
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
}
|
||||
elif isinstance(resolved, InlineBase64):
|
||||
if content_type.startswith("image/"):
|
||||
block = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
},
|
||||
}
|
||||
elif content_type == "application/pdf":
|
||||
block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
},
|
||||
}
|
||||
else:
|
||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
||||
if content_type.startswith("image/"):
|
||||
block = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": content_type,
|
||||
"data": data,
|
||||
},
|
||||
}
|
||||
elif content_type == "application/pdf":
|
||||
block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": content_type,
|
||||
"data": data,
|
||||
},
|
||||
}
|
||||
|
||||
if block and enable_caching and i == num_files - 1:
|
||||
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
|
||||
block["cache_control"] = cache_control
|
||||
|
||||
if block:
|
||||
content_blocks.append(block)
|
||||
|
||||
return content_blocks
|
||||
|
||||
async def aformat_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
enable_caching: bool = True,
|
||||
cache_ttl: str | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Async format files as Anthropic multimodal content blocks.
|
||||
|
||||
Uses parallel file resolution for improved performance with multiple files.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
enable_caching: Whether to add cache_control markers (default: True).
|
||||
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
|
||||
|
||||
Returns:
|
||||
List of content blocks in Anthropic's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
supported_files = {
|
||||
name: f
|
||||
for name, f in files.items()
|
||||
if any(f.content_type.startswith(t) for t in supported_types)
|
||||
}
|
||||
|
||||
if not supported_files:
|
||||
return []
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
resolved_files = await resolver.aresolve_files(supported_files, "anthropic")
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
num_files = len(resolved_files)
|
||||
file_names = list(supported_files.keys())
|
||||
|
||||
for i, name in enumerate(file_names):
|
||||
if name not in resolved_files:
|
||||
continue
|
||||
|
||||
resolved = resolved_files[name]
|
||||
file_input = supported_files[name]
|
||||
content_type = file_input.content_type
|
||||
block: dict[str, Any] = {}
|
||||
|
||||
if isinstance(resolved, FileReference):
|
||||
if content_type.startswith("image/"):
|
||||
block = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "file",
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
}
|
||||
elif content_type == "application/pdf":
|
||||
block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "file",
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
}
|
||||
elif isinstance(resolved, InlineBase64):
|
||||
if content_type.startswith("image/"):
|
||||
block = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
},
|
||||
}
|
||||
elif content_type == "application/pdf":
|
||||
block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
},
|
||||
}
|
||||
|
||||
if block and enable_caching and i == num_files - 1:
|
||||
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
|
||||
block["cache_control"] = cache_control
|
||||
|
||||
if block:
|
||||
content_blocks.append(block)
|
||||
|
||||
return content_blocks
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -18,8 +17,6 @@ from crewai.utilities.types import LLMMessage
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput, UploadCache
|
||||
|
||||
from crewai.llms.hooks.base import BaseInterceptor
|
||||
|
||||
|
||||
@@ -1040,115 +1037,3 @@ class AzureCompletion(BaseLLM):
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
return ["image/"]
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as Azure OpenAI multimodal content blocks.
|
||||
|
||||
Azure OpenAI uses the same image_url format as OpenAI.
|
||||
Uses FileResolver for consistent base64 encoding.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
|
||||
|
||||
Returns:
|
||||
List of content blocks in Azure OpenAI's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
from crewai_files import (
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
# Azure doesn't support file uploads for images, so just use inline
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
|
||||
for file_input in files.values():
|
||||
content_type = file_input.content_type
|
||||
if not any(content_type.startswith(t) for t in supported_types):
|
||||
continue
|
||||
|
||||
resolved = resolver.resolve(file_input, "azure")
|
||||
|
||||
if isinstance(resolved, InlineBase64):
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Fallback to direct base64 encoding
|
||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
async def aformat_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Async format files as Azure OpenAI multimodal content blocks.
|
||||
|
||||
Uses parallel file resolution for improved performance with multiple files.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
|
||||
|
||||
Returns:
|
||||
List of content blocks in Azure OpenAI's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
from crewai_files import (
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
supported_files = {
|
||||
name: f
|
||||
for name, f in files.items()
|
||||
if any(f.content_type.startswith(t) for t in supported_types)
|
||||
}
|
||||
|
||||
if not supported_files:
|
||||
return []
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
resolved_files = await resolver.aresolve_files(supported_files, "azure")
|
||||
|
||||
return [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
||||
},
|
||||
}
|
||||
for resolved in resolved_files.values()
|
||||
if isinstance(resolved, InlineBase64)
|
||||
]
|
||||
|
||||
@@ -20,7 +20,6 @@ from crewai.utilities.types import LLMMessage
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput, UploadCache
|
||||
from mypy_boto3_bedrock_runtime.type_defs import (
|
||||
GuardrailConfigurationTypeDef,
|
||||
GuardrailStreamConfigurationTypeDef,
|
||||
@@ -1563,260 +1562,3 @@ class BedrockCompletion(BaseLLM):
|
||||
"video/3gpp": "three_gp",
|
||||
}
|
||||
return format_map.get(content_type)
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as Bedrock Converse API multimodal content blocks.
|
||||
|
||||
Bedrock Converse API supports both raw bytes and S3 URI references.
|
||||
S3 uploads are only supported by Amazon Nova models.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for S3 uploads.
|
||||
|
||||
Returns:
|
||||
List of content blocks in Bedrock's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
import os
|
||||
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBytes,
|
||||
)
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
is_nova = self._is_nova_model()
|
||||
|
||||
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
||||
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
|
||||
prefer_upload = bool(s3_bucket) and is_nova
|
||||
|
||||
config = FileResolverConfig(
|
||||
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
||||
)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
|
||||
for name, file_input in files.items():
|
||||
content_type = file_input.content_type
|
||||
resolved = resolver.resolve(file_input, "bedrock")
|
||||
|
||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
|
||||
if s3_bucket_owner:
|
||||
s3_location["bucketOwner"] = s3_bucket_owner
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
media_type = content_type.split("/")[-1]
|
||||
if media_type == "jpg":
|
||||
media_type = "jpeg"
|
||||
content_blocks.append(
|
||||
{
|
||||
"image": {
|
||||
"format": media_type,
|
||||
"source": {"s3Location": s3_location},
|
||||
}
|
||||
}
|
||||
)
|
||||
elif content_type.startswith("video/"):
|
||||
video_format = self._get_video_format(content_type)
|
||||
if video_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"video": {
|
||||
"format": video_format,
|
||||
"source": {"s3Location": s3_location},
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
doc_format = self._get_document_format(content_type)
|
||||
if doc_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"document": {
|
||||
"name": name,
|
||||
"format": doc_format,
|
||||
"source": {"s3Location": s3_location},
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
if isinstance(resolved, InlineBytes):
|
||||
file_bytes = resolved.data
|
||||
else:
|
||||
file_bytes = file_input.read()
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
media_type = content_type.split("/")[-1]
|
||||
if media_type == "jpg":
|
||||
media_type = "jpeg"
|
||||
content_blocks.append(
|
||||
{
|
||||
"image": {
|
||||
"format": media_type,
|
||||
"source": {"bytes": file_bytes},
|
||||
}
|
||||
}
|
||||
)
|
||||
elif content_type.startswith("video/"):
|
||||
video_format = self._get_video_format(content_type)
|
||||
if video_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"video": {
|
||||
"format": video_format,
|
||||
"source": {"bytes": file_bytes},
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
doc_format = self._get_document_format(content_type)
|
||||
if doc_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"document": {
|
||||
"name": name,
|
||||
"format": doc_format,
|
||||
"source": {"bytes": file_bytes},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
async def aformat_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Async format files as Bedrock Converse API multimodal content blocks.
|
||||
|
||||
Uses parallel file resolution. S3 uploads are only supported by Nova models.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for S3 uploads.
|
||||
|
||||
Returns:
|
||||
List of content blocks in Bedrock's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
import os
|
||||
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBytes,
|
||||
)
|
||||
|
||||
is_nova = self._is_nova_model()
|
||||
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
||||
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
|
||||
prefer_upload = bool(s3_bucket) and is_nova
|
||||
|
||||
config = FileResolverConfig(
|
||||
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
||||
)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
resolved_files = await resolver.aresolve_files(files, "bedrock")
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
for name, resolved in resolved_files.items():
|
||||
file_input = files[name]
|
||||
content_type = file_input.content_type
|
||||
|
||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
|
||||
if s3_bucket_owner:
|
||||
s3_location["bucketOwner"] = s3_bucket_owner
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
media_type = content_type.split("/")[-1]
|
||||
if media_type == "jpg":
|
||||
media_type = "jpeg"
|
||||
content_blocks.append(
|
||||
{
|
||||
"image": {
|
||||
"format": media_type,
|
||||
"source": {"s3Location": s3_location},
|
||||
}
|
||||
}
|
||||
)
|
||||
elif content_type.startswith("video/"):
|
||||
video_format = self._get_video_format(content_type)
|
||||
if video_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"video": {
|
||||
"format": video_format,
|
||||
"source": {"s3Location": s3_location},
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
doc_format = self._get_document_format(content_type)
|
||||
if doc_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"document": {
|
||||
"name": name,
|
||||
"format": doc_format,
|
||||
"source": {"s3Location": s3_location},
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
if isinstance(resolved, InlineBytes):
|
||||
file_bytes = resolved.data
|
||||
else:
|
||||
file_bytes = await file_input.aread()
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
media_type = content_type.split("/")[-1]
|
||||
if media_type == "jpg":
|
||||
media_type = "jpeg"
|
||||
content_blocks.append(
|
||||
{
|
||||
"image": {
|
||||
"format": media_type,
|
||||
"source": {"bytes": file_bytes},
|
||||
}
|
||||
}
|
||||
)
|
||||
elif content_type.startswith("video/"):
|
||||
video_format = self._get_video_format(content_type)
|
||||
if video_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"video": {
|
||||
"format": video_format,
|
||||
"source": {"bytes": file_bytes},
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
doc_format = self._get_document_format(content_type)
|
||||
if doc_format:
|
||||
content_blocks.append(
|
||||
{
|
||||
"document": {
|
||||
"name": name,
|
||||
"format": doc_format,
|
||||
"source": {"bytes": file_bytes},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
@@ -19,11 +19,6 @@ from crewai.utilities.types import LLMMessage
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import (
|
||||
FileInput,
|
||||
UploadCache,
|
||||
)
|
||||
|
||||
from crewai.llms.hooks.base import BaseInterceptor
|
||||
|
||||
|
||||
@@ -1097,138 +1092,6 @@ class GeminiCompletion(BaseLLM):
|
||||
"""
|
||||
return ["image/", "audio/", "video/", "application/pdf", "text/"]
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as Gemini multimodal content blocks.
|
||||
|
||||
Gemini supports both inlineData format and file references via File API.
|
||||
Uses FileResolver to determine the best delivery method based on file size.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
|
||||
Returns:
|
||||
List of content blocks in Gemini's expected format.
|
||||
"""
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
|
||||
for file_input in files.values():
|
||||
content_type = file_input.content_type
|
||||
if not any(content_type.startswith(t) for t in supported_types):
|
||||
continue
|
||||
|
||||
resolved = resolver.resolve(file_input, "gemini")
|
||||
|
||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||
# Use file reference format for uploaded files
|
||||
content_blocks.append(
|
||||
{
|
||||
"fileData": {
|
||||
"mimeType": resolved.content_type,
|
||||
"fileUri": resolved.file_uri,
|
||||
}
|
||||
}
|
||||
)
|
||||
elif isinstance(resolved, InlineBase64):
|
||||
# Use inline format for smaller files
|
||||
content_blocks.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
}
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Fallback to base64 encoding
|
||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
||||
content_blocks.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": content_type,
|
||||
"data": data,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
async def aformat_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Async format files as Gemini multimodal content blocks.
|
||||
|
||||
Uses parallel file resolution for improved performance with multiple files.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
|
||||
Returns:
|
||||
List of content blocks in Gemini's expected format.
|
||||
"""
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
supported_files = {
|
||||
name: f
|
||||
for name, f in files.items()
|
||||
if any(f.content_type.startswith(t) for t in supported_types)
|
||||
}
|
||||
|
||||
if not supported_files:
|
||||
return []
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
resolved_files = await resolver.aresolve_files(supported_files, "gemini")
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
for resolved in resolved_files.values():
|
||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||
content_blocks.append(
|
||||
{
|
||||
"fileData": {
|
||||
"mimeType": resolved.content_type,
|
||||
"fileUri": resolved.file_uri,
|
||||
}
|
||||
}
|
||||
)
|
||||
elif isinstance(resolved, InlineBase64):
|
||||
content_blocks.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": resolved.content_type,
|
||||
"data": resolved.data,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
def format_text_content(self, text: str) -> dict[str, Any]:
|
||||
"""Format text as a Gemini content block.
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from collections.abc import AsyncIterator
|
||||
import json
|
||||
import logging
|
||||
@@ -27,8 +26,6 @@ from crewai.utilities.types import LLMMessage
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files import FileInput, UploadCache
|
||||
|
||||
from crewai.agent.core import Agent
|
||||
from crewai.llms.hooks.base import BaseInterceptor
|
||||
from crewai.task import Task
|
||||
@@ -1080,136 +1077,3 @@ class OpenAICompletion(BaseLLM):
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
return ["image/"]
|
||||
|
||||
def format_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Format files as OpenAI multimodal content blocks.
|
||||
|
||||
OpenAI supports both base64 data URLs and file_id references via Files API.
|
||||
Uses FileResolver to determine the best delivery method based on file size.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
|
||||
Returns:
|
||||
List of content blocks in OpenAI's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
|
||||
for file_input in files.values():
|
||||
content_type = file_input.content_type
|
||||
if not any(content_type.startswith(t) for t in supported_types):
|
||||
continue
|
||||
|
||||
resolved = resolver.resolve(file_input, "openai")
|
||||
|
||||
if isinstance(resolved, FileReference):
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
}
|
||||
)
|
||||
elif isinstance(resolved, InlineBase64):
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
async def aformat_multimodal_content(
|
||||
self,
|
||||
files: dict[str, FileInput],
|
||||
upload_cache: UploadCache | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Async format files as OpenAI multimodal content blocks.
|
||||
|
||||
Uses parallel file resolution for improved performance with multiple files.
|
||||
|
||||
Args:
|
||||
files: Dictionary mapping file names to FileInput objects.
|
||||
upload_cache: Optional cache for tracking uploaded files.
|
||||
|
||||
Returns:
|
||||
List of content blocks in OpenAI's expected format.
|
||||
"""
|
||||
if not self.supports_multimodal():
|
||||
return []
|
||||
|
||||
from crewai_files import (
|
||||
FileReference,
|
||||
FileResolver,
|
||||
FileResolverConfig,
|
||||
InlineBase64,
|
||||
)
|
||||
|
||||
supported_types = self.supported_multimodal_content_types()
|
||||
|
||||
supported_files = {
|
||||
name: f
|
||||
for name, f in files.items()
|
||||
if any(f.content_type.startswith(t) for t in supported_types)
|
||||
}
|
||||
|
||||
if not supported_files:
|
||||
return []
|
||||
|
||||
config = FileResolverConfig(prefer_upload=False)
|
||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||
resolved_files = await resolver.aresolve_files(supported_files, "openai")
|
||||
|
||||
content_blocks: list[dict[str, Any]] = []
|
||||
for resolved in resolved_files.values():
|
||||
if isinstance(resolved, FileReference):
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_id": resolved.file_id,
|
||||
},
|
||||
}
|
||||
)
|
||||
elif isinstance(resolved, InlineBase64):
|
||||
content_blocks.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return content_blocks
|
||||
|
||||
Reference in New Issue
Block a user