mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-28 17:48:13 +00:00
refactor: centralize multimodal formatting in crewai_files
This commit is contained in:
@@ -1,10 +1,10 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "crewai-files"
|
name = "crewai-files"
|
||||||
version = "0.1.0"
|
dynamic = ["version"]
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Greyson LaLonde", email = "greyson.r.lalonde@gmail.com" }
|
{ name = "Greyson LaLonde", email = "greyson@crewai.com" }
|
||||||
]
|
]
|
||||||
requires-python = ">=3.10, <3.14"
|
requires-python = ">=3.10, <3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
@@ -17,9 +17,9 @@ dependencies = [
|
|||||||
"av~=13.0.0",
|
"av~=13.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
|
||||||
crewai-files = "crewai_files:main"
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["hatchling"]
|
requires = ["hatchling"]
|
||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.version]
|
||||||
|
path = "src/crewai_files/__init__.py"
|
||||||
|
|||||||
@@ -49,6 +49,10 @@ from crewai_files.core.types import (
|
|||||||
VideoFile,
|
VideoFile,
|
||||||
VideoMimeType,
|
VideoMimeType,
|
||||||
)
|
)
|
||||||
|
from crewai_files.formatting import (
|
||||||
|
aformat_multimodal_content,
|
||||||
|
format_multimodal_content,
|
||||||
|
)
|
||||||
from crewai_files.processing import (
|
from crewai_files.processing import (
|
||||||
ANTHROPIC_CONSTRAINTS,
|
ANTHROPIC_CONSTRAINTS,
|
||||||
BEDROCK_CONSTRAINTS,
|
BEDROCK_CONSTRAINTS,
|
||||||
@@ -132,10 +136,12 @@ __all__ = [
|
|||||||
"VideoExtension",
|
"VideoExtension",
|
||||||
"VideoFile",
|
"VideoFile",
|
||||||
"VideoMimeType",
|
"VideoMimeType",
|
||||||
|
"aformat_multimodal_content",
|
||||||
"cleanup_expired_files",
|
"cleanup_expired_files",
|
||||||
"cleanup_provider_files",
|
"cleanup_provider_files",
|
||||||
"cleanup_uploaded_files",
|
"cleanup_uploaded_files",
|
||||||
"create_resolver",
|
"create_resolver",
|
||||||
|
"format_multimodal_content",
|
||||||
"get_constraints_for_provider",
|
"get_constraints_for_provider",
|
||||||
"get_upload_cache",
|
"get_upload_cache",
|
||||||
"get_uploader",
|
"get_uploader",
|
||||||
@@ -143,3 +149,5 @@ __all__ = [
|
|||||||
"reset_upload_cache",
|
"reset_upload_cache",
|
||||||
"wrap_file_source",
|
"wrap_file_source",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
__version__ = "1.8.1"
|
||||||
|
|||||||
12
lib/crewai-files/src/crewai_files/formatting/__init__.py
Normal file
12
lib/crewai-files/src/crewai_files/formatting/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
"""High-level formatting API for multimodal content."""
|
||||||
|
|
||||||
|
from crewai_files.formatting.api import (
|
||||||
|
aformat_multimodal_content,
|
||||||
|
format_multimodal_content,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"aformat_multimodal_content",
|
||||||
|
"format_multimodal_content",
|
||||||
|
]
|
||||||
91
lib/crewai-files/src/crewai_files/formatting/anthropic.py
Normal file
91
lib/crewai-files/src/crewai_files/formatting/anthropic.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
"""Anthropic content block formatter."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai_files.core.resolved import (
|
||||||
|
FileReference,
|
||||||
|
InlineBase64,
|
||||||
|
ResolvedFile,
|
||||||
|
UrlReference,
|
||||||
|
)
|
||||||
|
from crewai_files.core.types import FileInput
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicFormatter:
|
||||||
|
"""Formats resolved files into Anthropic content blocks."""
|
||||||
|
|
||||||
|
def format_block(
|
||||||
|
self,
|
||||||
|
file: FileInput,
|
||||||
|
resolved: ResolvedFile,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format a resolved file into an Anthropic content block.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file: Original file input with metadata.
|
||||||
|
resolved: Resolved file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None if not supported.
|
||||||
|
"""
|
||||||
|
content_type = file.content_type
|
||||||
|
block_type = self._get_block_type(content_type)
|
||||||
|
if block_type is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(resolved, FileReference):
|
||||||
|
return {
|
||||||
|
"type": block_type,
|
||||||
|
"source": {
|
||||||
|
"type": "file",
|
||||||
|
"file_id": resolved.file_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(resolved, UrlReference):
|
||||||
|
return {
|
||||||
|
"type": block_type,
|
||||||
|
"source": {
|
||||||
|
"type": "url",
|
||||||
|
"url": resolved.url,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(resolved, InlineBase64):
|
||||||
|
return {
|
||||||
|
"type": block_type,
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": resolved.content_type,
|
||||||
|
"data": resolved.data,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
data = base64.b64encode(file.read()).decode("ascii")
|
||||||
|
return {
|
||||||
|
"type": block_type,
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": content_type,
|
||||||
|
"data": data,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_block_type(content_type: str) -> str | None:
|
||||||
|
"""Get Anthropic block type for content type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_type: MIME type.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Block type string or None if not supported.
|
||||||
|
"""
|
||||||
|
if content_type.startswith("image/"):
|
||||||
|
return "image"
|
||||||
|
if content_type == "application/pdf":
|
||||||
|
return "document"
|
||||||
|
return None
|
||||||
277
lib/crewai-files/src/crewai_files/formatting/api.py
Normal file
277
lib/crewai-files/src/crewai_files/formatting/api.py
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
"""High-level API for formatting multimodal content."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai_files.cache.upload_cache import get_upload_cache
|
||||||
|
from crewai_files.core.types import FileInput
|
||||||
|
from crewai_files.formatting.anthropic import AnthropicFormatter
|
||||||
|
from crewai_files.formatting.bedrock import BedrockFormatter
|
||||||
|
from crewai_files.formatting.gemini import GeminiFormatter
|
||||||
|
from crewai_files.formatting.openai import OpenAIFormatter
|
||||||
|
from crewai_files.processing.constraints import get_constraints_for_provider
|
||||||
|
from crewai_files.processing.processor import FileProcessor
|
||||||
|
from crewai_files.resolution.resolver import FileResolver, FileResolverConfig
|
||||||
|
from crewai_files.uploaders.factory import ProviderType
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_provider(provider: str | None) -> ProviderType:
|
||||||
|
"""Normalize provider string to ProviderType.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider: Raw provider string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized provider type.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If provider is None or empty.
|
||||||
|
"""
|
||||||
|
if not provider:
|
||||||
|
raise ValueError("provider is required")
|
||||||
|
|
||||||
|
provider_lower = provider.lower()
|
||||||
|
|
||||||
|
if "gemini" in provider_lower:
|
||||||
|
return "gemini"
|
||||||
|
if "google" in provider_lower:
|
||||||
|
return "google"
|
||||||
|
if "anthropic" in provider_lower:
|
||||||
|
return "anthropic"
|
||||||
|
if "claude" in provider_lower:
|
||||||
|
return "claude"
|
||||||
|
if "bedrock" in provider_lower:
|
||||||
|
return "bedrock"
|
||||||
|
if "aws" in provider_lower:
|
||||||
|
return "aws"
|
||||||
|
if "azure" in provider_lower:
|
||||||
|
return "azure"
|
||||||
|
if "gpt" in provider_lower:
|
||||||
|
return "gpt"
|
||||||
|
|
||||||
|
return "openai"
|
||||||
|
|
||||||
|
|
||||||
|
def format_multimodal_content(
|
||||||
|
files: dict[str, FileInput],
|
||||||
|
provider: str | None = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Format files as provider-specific multimodal content blocks.
|
||||||
|
|
||||||
|
This is the main high-level API for converting files to content blocks
|
||||||
|
suitable for sending to LLM providers. It handles:
|
||||||
|
- File processing according to provider constraints
|
||||||
|
- Resolution (upload vs inline) based on provider capabilities
|
||||||
|
- Formatting into provider-specific content block structures
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files: Dictionary mapping file names to FileInput objects.
|
||||||
|
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of content blocks in the provider's expected format.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> from crewai_files import format_multimodal_content, ImageFile
|
||||||
|
>>> files = {"photo": ImageFile(source="image.jpg")}
|
||||||
|
>>> blocks = format_multimodal_content(files, "openai")
|
||||||
|
"""
|
||||||
|
if not files:
|
||||||
|
return []
|
||||||
|
|
||||||
|
provider_type = _normalize_provider(provider)
|
||||||
|
|
||||||
|
processor = FileProcessor(constraints=provider_type)
|
||||||
|
processed_files = processor.process_files(files)
|
||||||
|
|
||||||
|
if not processed_files:
|
||||||
|
return []
|
||||||
|
|
||||||
|
constraints = get_constraints_for_provider(provider_type)
|
||||||
|
supported_types = _get_supported_types(constraints)
|
||||||
|
supported_files = _filter_supported_files(processed_files, supported_types)
|
||||||
|
|
||||||
|
if not supported_files:
|
||||||
|
return []
|
||||||
|
|
||||||
|
config = _get_resolver_config(provider_type)
|
||||||
|
upload_cache = get_upload_cache()
|
||||||
|
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||||
|
|
||||||
|
formatter = _get_formatter(provider_type)
|
||||||
|
content_blocks: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
for name, file_input in supported_files.items():
|
||||||
|
resolved = resolver.resolve(file_input, provider_type)
|
||||||
|
block = _format_block(formatter, file_input, resolved, name)
|
||||||
|
if block is not None:
|
||||||
|
content_blocks.append(block)
|
||||||
|
|
||||||
|
return content_blocks
|
||||||
|
|
||||||
|
|
||||||
|
async def aformat_multimodal_content(
|
||||||
|
files: dict[str, FileInput],
|
||||||
|
provider: str | None = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Async format files as provider-specific multimodal content blocks.
|
||||||
|
|
||||||
|
Async version of format_multimodal_content with parallel file resolution.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files: Dictionary mapping file names to FileInput objects.
|
||||||
|
provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of content blocks in the provider's expected format.
|
||||||
|
"""
|
||||||
|
if not files:
|
||||||
|
return []
|
||||||
|
|
||||||
|
provider_type = _normalize_provider(provider)
|
||||||
|
|
||||||
|
processor = FileProcessor(constraints=provider_type)
|
||||||
|
processed_files = await processor.aprocess_files(files)
|
||||||
|
|
||||||
|
if not processed_files:
|
||||||
|
return []
|
||||||
|
|
||||||
|
constraints = get_constraints_for_provider(provider_type)
|
||||||
|
supported_types = _get_supported_types(constraints)
|
||||||
|
supported_files = _filter_supported_files(processed_files, supported_types)
|
||||||
|
|
||||||
|
if not supported_files:
|
||||||
|
return []
|
||||||
|
|
||||||
|
config = _get_resolver_config(provider_type)
|
||||||
|
upload_cache = get_upload_cache()
|
||||||
|
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
||||||
|
|
||||||
|
resolved_files = await resolver.aresolve_files(supported_files, provider_type)
|
||||||
|
|
||||||
|
formatter = _get_formatter(provider_type)
|
||||||
|
content_blocks: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
for name, resolved in resolved_files.items():
|
||||||
|
file_input = supported_files[name]
|
||||||
|
block = _format_block(formatter, file_input, resolved, name)
|
||||||
|
if block is not None:
|
||||||
|
content_blocks.append(block)
|
||||||
|
|
||||||
|
return content_blocks
|
||||||
|
|
||||||
|
|
||||||
|
def _get_supported_types(
|
||||||
|
constraints: Any | None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Get list of supported MIME type prefixes from constraints.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
constraints: Provider constraints.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of MIME type prefixes (e.g., ["image/", "application/pdf"]).
|
||||||
|
"""
|
||||||
|
if constraints is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
supported: list[str] = []
|
||||||
|
if constraints.image is not None:
|
||||||
|
supported.append("image/")
|
||||||
|
if constraints.pdf is not None:
|
||||||
|
supported.append("application/pdf")
|
||||||
|
if constraints.audio is not None:
|
||||||
|
supported.append("audio/")
|
||||||
|
if constraints.video is not None:
|
||||||
|
supported.append("video/")
|
||||||
|
return supported
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_supported_files(
|
||||||
|
files: dict[str, FileInput],
|
||||||
|
supported_types: list[str],
|
||||||
|
) -> dict[str, FileInput]:
|
||||||
|
"""Filter files to those with supported content types.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files: All files.
|
||||||
|
supported_types: MIME type prefixes to allow.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Filtered dictionary of supported files.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
name: f
|
||||||
|
for name, f in files.items()
|
||||||
|
if any(f.content_type.startswith(t) for t in supported_types)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_resolver_config(provider_lower: str) -> FileResolverConfig:
|
||||||
|
"""Get resolver config for provider.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider_lower: Lowercase provider name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured FileResolverConfig.
|
||||||
|
"""
|
||||||
|
if "bedrock" in provider_lower:
|
||||||
|
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
||||||
|
prefer_upload = bool(s3_bucket)
|
||||||
|
return FileResolverConfig(
|
||||||
|
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return FileResolverConfig(prefer_upload=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_formatter(
|
||||||
|
provider_lower: str,
|
||||||
|
) -> OpenAIFormatter | AnthropicFormatter | BedrockFormatter | GeminiFormatter:
|
||||||
|
"""Get formatter for provider.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider_lower: Lowercase provider name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Provider-specific formatter instance.
|
||||||
|
"""
|
||||||
|
if "anthropic" in provider_lower or "claude" in provider_lower:
|
||||||
|
return AnthropicFormatter()
|
||||||
|
|
||||||
|
if "bedrock" in provider_lower or "aws" in provider_lower:
|
||||||
|
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
|
||||||
|
return BedrockFormatter(s3_bucket_owner=s3_bucket_owner)
|
||||||
|
|
||||||
|
if "gemini" in provider_lower or "google" in provider_lower:
|
||||||
|
return GeminiFormatter()
|
||||||
|
|
||||||
|
return OpenAIFormatter()
|
||||||
|
|
||||||
|
|
||||||
|
def _format_block(
|
||||||
|
formatter: OpenAIFormatter
|
||||||
|
| AnthropicFormatter
|
||||||
|
| BedrockFormatter
|
||||||
|
| GeminiFormatter,
|
||||||
|
file_input: FileInput,
|
||||||
|
resolved: Any,
|
||||||
|
name: str,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format a single file block using the appropriate formatter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
formatter: Provider formatter.
|
||||||
|
file_input: Original file input.
|
||||||
|
resolved: Resolved file.
|
||||||
|
name: File name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None.
|
||||||
|
"""
|
||||||
|
if isinstance(formatter, BedrockFormatter):
|
||||||
|
return formatter.format_block(file_input, resolved, name=name)
|
||||||
|
return formatter.format_block(file_input, resolved)
|
||||||
28
lib/crewai-files/src/crewai_files/formatting/base.py
Normal file
28
lib/crewai-files/src/crewai_files/formatting/base.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""Base formatter protocol for provider-specific content blocks."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Protocol
|
||||||
|
|
||||||
|
from crewai_files.core.resolved import ResolvedFile
|
||||||
|
from crewai_files.core.types import FileInput
|
||||||
|
|
||||||
|
|
||||||
|
class ContentFormatter(Protocol):
|
||||||
|
"""Protocol for formatting resolved files into provider content blocks."""
|
||||||
|
|
||||||
|
def format_block(
|
||||||
|
self,
|
||||||
|
file: FileInput,
|
||||||
|
resolved: ResolvedFile,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format a resolved file into a provider-specific content block.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file: Original file input with metadata.
|
||||||
|
resolved: Resolved file (FileReference, InlineBase64, etc.).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None if file type not supported.
|
||||||
|
"""
|
||||||
|
...
|
||||||
188
lib/crewai-files/src/crewai_files/formatting/bedrock.py
Normal file
188
lib/crewai-files/src/crewai_files/formatting/bedrock.py
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
"""Bedrock content block formatter."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai_files.core.resolved import (
|
||||||
|
FileReference,
|
||||||
|
InlineBytes,
|
||||||
|
ResolvedFile,
|
||||||
|
)
|
||||||
|
from crewai_files.core.types import FileInput
|
||||||
|
|
||||||
|
|
||||||
|
_DOCUMENT_FORMATS: dict[str, str] = {
|
||||||
|
"application/pdf": "pdf",
|
||||||
|
"text/csv": "csv",
|
||||||
|
"text/plain": "txt",
|
||||||
|
"text/markdown": "md",
|
||||||
|
"text/html": "html",
|
||||||
|
"application/msword": "doc",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
||||||
|
"application/vnd.ms-excel": "xls",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
||||||
|
}
|
||||||
|
|
||||||
|
_VIDEO_FORMATS: dict[str, str] = {
|
||||||
|
"video/mp4": "mp4",
|
||||||
|
"video/quicktime": "mov",
|
||||||
|
"video/x-matroska": "mkv",
|
||||||
|
"video/webm": "webm",
|
||||||
|
"video/x-flv": "flv",
|
||||||
|
"video/mpeg": "mpeg",
|
||||||
|
"video/3gpp": "three_gp",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BedrockFormatter:
|
||||||
|
"""Formats resolved files into Bedrock Converse API content blocks."""
|
||||||
|
|
||||||
|
def __init__(self, s3_bucket_owner: str | None = None) -> None:
|
||||||
|
"""Initialize formatter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
s3_bucket_owner: Optional S3 bucket owner for file references.
|
||||||
|
"""
|
||||||
|
self.s3_bucket_owner = s3_bucket_owner
|
||||||
|
|
||||||
|
def format_block(
|
||||||
|
self,
|
||||||
|
file: FileInput,
|
||||||
|
resolved: ResolvedFile,
|
||||||
|
name: str | None = None,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format a resolved file into a Bedrock content block.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file: Original file input with metadata.
|
||||||
|
resolved: Resolved file.
|
||||||
|
name: File name (required for document blocks).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None if not supported.
|
||||||
|
"""
|
||||||
|
content_type = file.content_type
|
||||||
|
|
||||||
|
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||||
|
return self._format_s3_block(content_type, resolved.file_uri, name)
|
||||||
|
|
||||||
|
if isinstance(resolved, InlineBytes):
|
||||||
|
file_bytes = resolved.data
|
||||||
|
else:
|
||||||
|
file_bytes = file.read()
|
||||||
|
|
||||||
|
return self._format_bytes_block(content_type, file_bytes, name)
|
||||||
|
|
||||||
|
def _format_s3_block(
|
||||||
|
self,
|
||||||
|
content_type: str,
|
||||||
|
file_uri: str,
|
||||||
|
name: str | None,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format block with S3 location source.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_type: MIME type.
|
||||||
|
file_uri: S3 URI.
|
||||||
|
name: File name for documents.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None.
|
||||||
|
"""
|
||||||
|
s3_location: dict[str, Any] = {"uri": file_uri}
|
||||||
|
if self.s3_bucket_owner:
|
||||||
|
s3_location["bucketOwner"] = self.s3_bucket_owner
|
||||||
|
|
||||||
|
if content_type.startswith("image/"):
|
||||||
|
return {
|
||||||
|
"image": {
|
||||||
|
"format": self._get_image_format(content_type),
|
||||||
|
"source": {"s3Location": s3_location},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if content_type.startswith("video/"):
|
||||||
|
video_format = _VIDEO_FORMATS.get(content_type)
|
||||||
|
if video_format:
|
||||||
|
return {
|
||||||
|
"video": {
|
||||||
|
"format": video_format,
|
||||||
|
"source": {"s3Location": s3_location},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
doc_format = _DOCUMENT_FORMATS.get(content_type)
|
||||||
|
if doc_format:
|
||||||
|
return {
|
||||||
|
"document": {
|
||||||
|
"name": name or "document",
|
||||||
|
"format": doc_format,
|
||||||
|
"source": {"s3Location": s3_location},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _format_bytes_block(
|
||||||
|
self,
|
||||||
|
content_type: str,
|
||||||
|
file_bytes: bytes,
|
||||||
|
name: str | None,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format block with inline bytes source.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_type: MIME type.
|
||||||
|
file_bytes: Raw file bytes.
|
||||||
|
name: File name for documents.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None.
|
||||||
|
"""
|
||||||
|
if content_type.startswith("image/"):
|
||||||
|
return {
|
||||||
|
"image": {
|
||||||
|
"format": self._get_image_format(content_type),
|
||||||
|
"source": {"bytes": file_bytes},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if content_type.startswith("video/"):
|
||||||
|
video_format = _VIDEO_FORMATS.get(content_type)
|
||||||
|
if video_format:
|
||||||
|
return {
|
||||||
|
"video": {
|
||||||
|
"format": video_format,
|
||||||
|
"source": {"bytes": file_bytes},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
doc_format = _DOCUMENT_FORMATS.get(content_type)
|
||||||
|
if doc_format:
|
||||||
|
return {
|
||||||
|
"document": {
|
||||||
|
"name": name or "document",
|
||||||
|
"format": doc_format,
|
||||||
|
"source": {"bytes": file_bytes},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_image_format(content_type: str) -> str:
|
||||||
|
"""Get Bedrock image format from content type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_type: MIME type.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Format string for Bedrock.
|
||||||
|
"""
|
||||||
|
media_type = content_type.split("/")[-1]
|
||||||
|
if media_type == "jpg":
|
||||||
|
return "jpeg"
|
||||||
|
return media_type
|
||||||
66
lib/crewai-files/src/crewai_files/formatting/gemini.py
Normal file
66
lib/crewai-files/src/crewai_files/formatting/gemini.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
"""Gemini content block formatter."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai_files.core.resolved import (
|
||||||
|
FileReference,
|
||||||
|
InlineBase64,
|
||||||
|
ResolvedFile,
|
||||||
|
UrlReference,
|
||||||
|
)
|
||||||
|
from crewai_files.core.types import FileInput
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiFormatter:
|
||||||
|
"""Formats resolved files into Gemini content blocks."""
|
||||||
|
|
||||||
|
def format_block(
|
||||||
|
self,
|
||||||
|
file: FileInput,
|
||||||
|
resolved: ResolvedFile,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format a resolved file into a Gemini content block.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file: Original file input with metadata.
|
||||||
|
resolved: Resolved file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None if not supported.
|
||||||
|
"""
|
||||||
|
content_type = file.content_type
|
||||||
|
|
||||||
|
if isinstance(resolved, FileReference) and resolved.file_uri:
|
||||||
|
return {
|
||||||
|
"fileData": {
|
||||||
|
"mimeType": resolved.content_type,
|
||||||
|
"fileUri": resolved.file_uri,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(resolved, UrlReference):
|
||||||
|
return {
|
||||||
|
"fileData": {
|
||||||
|
"mimeType": content_type,
|
||||||
|
"fileUri": resolved.url,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(resolved, InlineBase64):
|
||||||
|
return {
|
||||||
|
"inlineData": {
|
||||||
|
"mimeType": resolved.content_type,
|
||||||
|
"data": resolved.data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data = base64.b64encode(file.read()).decode("ascii")
|
||||||
|
return {
|
||||||
|
"inlineData": {
|
||||||
|
"mimeType": content_type,
|
||||||
|
"data": data,
|
||||||
|
}
|
||||||
|
}
|
||||||
60
lib/crewai-files/src/crewai_files/formatting/openai.py
Normal file
60
lib/crewai-files/src/crewai_files/formatting/openai.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""OpenAI content block formatter."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from crewai_files.core.resolved import (
|
||||||
|
FileReference,
|
||||||
|
InlineBase64,
|
||||||
|
ResolvedFile,
|
||||||
|
UrlReference,
|
||||||
|
)
|
||||||
|
from crewai_files.core.types import FileInput
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIFormatter:
|
||||||
|
"""Formats resolved files into OpenAI content blocks."""
|
||||||
|
|
||||||
|
def format_block(
|
||||||
|
self,
|
||||||
|
file: FileInput,
|
||||||
|
resolved: ResolvedFile,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
"""Format a resolved file into an OpenAI content block.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file: Original file input with metadata.
|
||||||
|
resolved: Resolved file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content block dict or None if not supported.
|
||||||
|
"""
|
||||||
|
content_type = file.content_type
|
||||||
|
|
||||||
|
if isinstance(resolved, FileReference):
|
||||||
|
return {
|
||||||
|
"type": "file",
|
||||||
|
"file": {"file_id": resolved.file_id},
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(resolved, UrlReference):
|
||||||
|
return {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": resolved.url},
|
||||||
|
}
|
||||||
|
|
||||||
|
if isinstance(resolved, InlineBase64):
|
||||||
|
return {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
data = base64.b64encode(file.read()).decode("ascii")
|
||||||
|
return {
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
||||||
|
}
|
||||||
@@ -10,7 +10,7 @@ from collections.abc import Callable
|
|||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||||
|
|
||||||
from crewai_files import FileProcessor
|
from crewai_files import aformat_multimodal_content, format_multimodal_content
|
||||||
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
|
from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError
|
||||||
from pydantic_core import CoreSchema, core_schema
|
from pydantic_core import CoreSchema, core_schema
|
||||||
|
|
||||||
@@ -220,9 +220,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
|||||||
"""Inject files as multimodal content into messages.
|
"""Inject files as multimodal content into messages.
|
||||||
|
|
||||||
For crews with input files and LLMs that support multimodal,
|
For crews with input files and LLMs that support multimodal,
|
||||||
processes files according to provider constraints and file handling mode,
|
uses crewai_files to process, resolve, and format files into
|
||||||
then delegates to the LLM's format_multimodal_content method to
|
provider-specific content blocks.
|
||||||
generate provider-specific content blocks.
|
|
||||||
"""
|
"""
|
||||||
if not self.crew or not self.task:
|
if not self.crew or not self.task:
|
||||||
return
|
return
|
||||||
@@ -235,15 +234,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
|||||||
return
|
return
|
||||||
|
|
||||||
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
||||||
processor = FileProcessor(constraints=provider)
|
content_blocks = format_multimodal_content(files, provider)
|
||||||
files = processor.process_files(files)
|
|
||||||
|
|
||||||
from crewai_files import get_upload_cache
|
|
||||||
|
|
||||||
upload_cache = get_upload_cache()
|
|
||||||
content_blocks = self.llm.format_multimodal_content(
|
|
||||||
files, upload_cache=upload_cache
|
|
||||||
)
|
|
||||||
if not content_blocks:
|
if not content_blocks:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -262,9 +254,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
|||||||
"""Async inject files as multimodal content into messages.
|
"""Async inject files as multimodal content into messages.
|
||||||
|
|
||||||
For crews with input files and LLMs that support multimodal,
|
For crews with input files and LLMs that support multimodal,
|
||||||
processes files according to provider constraints using parallel processing,
|
uses crewai_files to process, resolve, and format files into
|
||||||
then delegates to the LLM's aformat_multimodal_content method to
|
provider-specific content blocks with parallel file resolution.
|
||||||
generate provider-specific content blocks with parallel file resolution.
|
|
||||||
"""
|
"""
|
||||||
if not self.crew or not self.task:
|
if not self.crew or not self.task:
|
||||||
return
|
return
|
||||||
@@ -277,15 +268,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
|||||||
return
|
return
|
||||||
|
|
||||||
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "")
|
||||||
processor = FileProcessor(constraints=provider)
|
content_blocks = await aformat_multimodal_content(files, provider)
|
||||||
files = await processor.aprocess_files(files)
|
|
||||||
|
|
||||||
from crewai_files import get_upload_cache
|
|
||||||
|
|
||||||
upload_cache = get_upload_cache()
|
|
||||||
content_blocks = await self.llm.aformat_multimodal_content(
|
|
||||||
files, upload_cache=upload_cache
|
|
||||||
)
|
|
||||||
if not content_blocks:
|
if not content_blocks:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,6 @@ from crewai.utilities.logger_utils import suppress_warnings
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import FileInput, UploadCache
|
|
||||||
from litellm.exceptions import ContextWindowExceededError
|
from litellm.exceptions import ContextWindowExceededError
|
||||||
from litellm.litellm_core_utils.get_supported_openai_params import (
|
from litellm.litellm_core_utils.get_supported_openai_params import (
|
||||||
get_supported_openai_params,
|
get_supported_openai_params,
|
||||||
@@ -2254,66 +2253,3 @@ class LLM(BaseLLM):
|
|||||||
if "claude-3" in model_lower or "claude-4" in model_lower:
|
if "claude-3" in model_lower or "claude-4" in model_lower:
|
||||||
return ["image/", "application/pdf"]
|
return ["image/", "application/pdf"]
|
||||||
return ["image/"]
|
return ["image/"]
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as multimodal content blocks for litellm.
|
|
||||||
|
|
||||||
Uses OpenAI-compatible format which litellm translates to provider format.
|
|
||||||
Uses FileResolver for consistent base64 encoding.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache (not used by litellm but kept for interface consistency).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in OpenAI's expected format.
|
|
||||||
"""
|
|
||||||
import base64
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
# LiteLLM uses OpenAI-compatible format
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
|
|
||||||
for file_input in files.values():
|
|
||||||
content_type = file_input.content_type
|
|
||||||
if not any(content_type.startswith(t) for t in supported_types):
|
|
||||||
continue
|
|
||||||
|
|
||||||
resolved = resolver.resolve(file_input, "openai")
|
|
||||||
|
|
||||||
if isinstance(resolved, InlineBase64):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fallback to direct base64 encoding
|
|
||||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|||||||
@@ -32,8 +32,6 @@ from crewai.types.usage_metrics import UsageMetrics
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import FileInput, UploadCache
|
|
||||||
|
|
||||||
from crewai.agent.core import Agent
|
from crewai.agent.core import Agent
|
||||||
from crewai.task import Task
|
from crewai.task import Task
|
||||||
from crewai.tools.base_tool import BaseTool
|
from crewai.tools.base_tool import BaseTool
|
||||||
@@ -298,43 +296,6 @@ class BaseLLM(ABC):
|
|||||||
"""
|
"""
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as multimodal content blocks for the LLM.
|
|
||||||
|
|
||||||
Subclasses should override this to provide provider-specific formatting.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in the provider's expected format.
|
|
||||||
"""
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def aformat_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Async format files as multimodal content blocks for the LLM.
|
|
||||||
|
|
||||||
Default implementation calls the sync version. Subclasses should
|
|
||||||
override to use async file resolution for parallel processing.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in the provider's expected format.
|
|
||||||
"""
|
|
||||||
return self.format_multimodal_content(files, upload_cache)
|
|
||||||
|
|
||||||
def format_text_content(self, text: str) -> dict[str, Any]:
|
def format_text_content(self, text: str) -> dict[str, Any]:
|
||||||
"""Format text as a content block for the LLM.
|
"""Format text as a content block for the LLM.
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -20,12 +19,8 @@ from crewai.utilities.types import LLMMessage
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import FileInput, UploadCache
|
|
||||||
|
|
||||||
from crewai.llms.hooks.base import BaseInterceptor
|
from crewai.llms.hooks.base import BaseInterceptor
|
||||||
|
|
||||||
DEFAULT_CACHE_TTL = "ephemeral"
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from anthropic import Anthropic, AsyncAnthropic
|
from anthropic import Anthropic, AsyncAnthropic
|
||||||
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
|
from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock
|
||||||
@@ -1256,222 +1251,3 @@ class AnthropicCompletion(BaseLLM):
|
|||||||
if not self.supports_multimodal():
|
if not self.supports_multimodal():
|
||||||
return []
|
return []
|
||||||
return ["image/", "application/pdf"]
|
return ["image/", "application/pdf"]
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
enable_caching: bool = True,
|
|
||||||
cache_ttl: str | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as Anthropic multimodal content blocks.
|
|
||||||
|
|
||||||
Anthropic supports both base64 inline format and file references via Files API.
|
|
||||||
Uses FileResolver to determine the best delivery method based on file size.
|
|
||||||
Supports prompt caching to reduce costs and latency for repeated file usage.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
enable_caching: Whether to add cache_control markers (default: True).
|
|
||||||
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Anthropic's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
|
|
||||||
file_list = list(files.values())
|
|
||||||
num_files = len(file_list)
|
|
||||||
|
|
||||||
for i, file_input in enumerate(file_list):
|
|
||||||
content_type = file_input.content_type
|
|
||||||
if not any(content_type.startswith(t) for t in supported_types):
|
|
||||||
continue
|
|
||||||
|
|
||||||
resolved = resolver.resolve(file_input, "anthropic")
|
|
||||||
block: dict[str, Any] = {}
|
|
||||||
|
|
||||||
if isinstance(resolved, FileReference):
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
block = {
|
|
||||||
"type": "image",
|
|
||||||
"source": {
|
|
||||||
"type": "file",
|
|
||||||
"file_id": resolved.file_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif content_type == "application/pdf":
|
|
||||||
block = {
|
|
||||||
"type": "document",
|
|
||||||
"source": {
|
|
||||||
"type": "file",
|
|
||||||
"file_id": resolved.file_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif isinstance(resolved, InlineBase64):
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
block = {
|
|
||||||
"type": "image",
|
|
||||||
"source": {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": resolved.content_type,
|
|
||||||
"data": resolved.data,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif content_type == "application/pdf":
|
|
||||||
block = {
|
|
||||||
"type": "document",
|
|
||||||
"source": {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": resolved.content_type,
|
|
||||||
"data": resolved.data,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
block = {
|
|
||||||
"type": "image",
|
|
||||||
"source": {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": content_type,
|
|
||||||
"data": data,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif content_type == "application/pdf":
|
|
||||||
block = {
|
|
||||||
"type": "document",
|
|
||||||
"source": {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": content_type,
|
|
||||||
"data": data,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if block and enable_caching and i == num_files - 1:
|
|
||||||
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
|
|
||||||
block["cache_control"] = cache_control
|
|
||||||
|
|
||||||
if block:
|
|
||||||
content_blocks.append(block)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|
||||||
async def aformat_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
enable_caching: bool = True,
|
|
||||||
cache_ttl: str | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Async format files as Anthropic multimodal content blocks.
|
|
||||||
|
|
||||||
Uses parallel file resolution for improved performance with multiple files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
enable_caching: Whether to add cache_control markers (default: True).
|
|
||||||
cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Anthropic's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
supported_files = {
|
|
||||||
name: f
|
|
||||||
for name, f in files.items()
|
|
||||||
if any(f.content_type.startswith(t) for t in supported_types)
|
|
||||||
}
|
|
||||||
|
|
||||||
if not supported_files:
|
|
||||||
return []
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
resolved_files = await resolver.aresolve_files(supported_files, "anthropic")
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
num_files = len(resolved_files)
|
|
||||||
file_names = list(supported_files.keys())
|
|
||||||
|
|
||||||
for i, name in enumerate(file_names):
|
|
||||||
if name not in resolved_files:
|
|
||||||
continue
|
|
||||||
|
|
||||||
resolved = resolved_files[name]
|
|
||||||
file_input = supported_files[name]
|
|
||||||
content_type = file_input.content_type
|
|
||||||
block: dict[str, Any] = {}
|
|
||||||
|
|
||||||
if isinstance(resolved, FileReference):
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
block = {
|
|
||||||
"type": "image",
|
|
||||||
"source": {
|
|
||||||
"type": "file",
|
|
||||||
"file_id": resolved.file_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif content_type == "application/pdf":
|
|
||||||
block = {
|
|
||||||
"type": "document",
|
|
||||||
"source": {
|
|
||||||
"type": "file",
|
|
||||||
"file_id": resolved.file_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif isinstance(resolved, InlineBase64):
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
block = {
|
|
||||||
"type": "image",
|
|
||||||
"source": {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": resolved.content_type,
|
|
||||||
"data": resolved.data,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
elif content_type == "application/pdf":
|
|
||||||
block = {
|
|
||||||
"type": "document",
|
|
||||||
"source": {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": resolved.content_type,
|
|
||||||
"data": resolved.data,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if block and enable_caching and i == num_files - 1:
|
|
||||||
cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL}
|
|
||||||
block["cache_control"] = cache_control
|
|
||||||
|
|
||||||
if block:
|
|
||||||
content_blocks.append(block)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -18,8 +17,6 @@ from crewai.utilities.types import LLMMessage
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import FileInput, UploadCache
|
|
||||||
|
|
||||||
from crewai.llms.hooks.base import BaseInterceptor
|
from crewai.llms.hooks.base import BaseInterceptor
|
||||||
|
|
||||||
|
|
||||||
@@ -1040,115 +1037,3 @@ class AzureCompletion(BaseLLM):
|
|||||||
if not self.supports_multimodal():
|
if not self.supports_multimodal():
|
||||||
return []
|
return []
|
||||||
return ["image/"]
|
return ["image/"]
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as Azure OpenAI multimodal content blocks.
|
|
||||||
|
|
||||||
Azure OpenAI uses the same image_url format as OpenAI.
|
|
||||||
Uses FileResolver for consistent base64 encoding.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Azure OpenAI's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
# Azure doesn't support file uploads for images, so just use inline
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
|
|
||||||
for file_input in files.values():
|
|
||||||
content_type = file_input.content_type
|
|
||||||
if not any(content_type.startswith(t) for t in supported_types):
|
|
||||||
continue
|
|
||||||
|
|
||||||
resolved = resolver.resolve(file_input, "azure")
|
|
||||||
|
|
||||||
if isinstance(resolved, InlineBase64):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fallback to direct base64 encoding
|
|
||||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|
||||||
async def aformat_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Async format files as Azure OpenAI multimodal content blocks.
|
|
||||||
|
|
||||||
Uses parallel file resolution for improved performance with multiple files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache (not used by Azure but kept for interface consistency).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Azure OpenAI's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
supported_files = {
|
|
||||||
name: f
|
|
||||||
for name, f in files.items()
|
|
||||||
if any(f.content_type.startswith(t) for t in supported_types)
|
|
||||||
}
|
|
||||||
|
|
||||||
if not supported_files:
|
|
||||||
return []
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
resolved_files = await resolver.aresolve_files(supported_files, "azure")
|
|
||||||
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for resolved in resolved_files.values()
|
|
||||||
if isinstance(resolved, InlineBase64)
|
|
||||||
]
|
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ from crewai.utilities.types import LLMMessage
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import FileInput, UploadCache
|
|
||||||
from mypy_boto3_bedrock_runtime.type_defs import (
|
from mypy_boto3_bedrock_runtime.type_defs import (
|
||||||
GuardrailConfigurationTypeDef,
|
GuardrailConfigurationTypeDef,
|
||||||
GuardrailStreamConfigurationTypeDef,
|
GuardrailStreamConfigurationTypeDef,
|
||||||
@@ -1563,260 +1562,3 @@ class BedrockCompletion(BaseLLM):
|
|||||||
"video/3gpp": "three_gp",
|
"video/3gpp": "three_gp",
|
||||||
}
|
}
|
||||||
return format_map.get(content_type)
|
return format_map.get(content_type)
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as Bedrock Converse API multimodal content blocks.
|
|
||||||
|
|
||||||
Bedrock Converse API supports both raw bytes and S3 URI references.
|
|
||||||
S3 uploads are only supported by Amazon Nova models.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for S3 uploads.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Bedrock's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBytes,
|
|
||||||
)
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
is_nova = self._is_nova_model()
|
|
||||||
|
|
||||||
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
|
||||||
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
|
|
||||||
prefer_upload = bool(s3_bucket) and is_nova
|
|
||||||
|
|
||||||
config = FileResolverConfig(
|
|
||||||
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
|
||||||
)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
|
|
||||||
for name, file_input in files.items():
|
|
||||||
content_type = file_input.content_type
|
|
||||||
resolved = resolver.resolve(file_input, "bedrock")
|
|
||||||
|
|
||||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
|
||||||
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
|
|
||||||
if s3_bucket_owner:
|
|
||||||
s3_location["bucketOwner"] = s3_bucket_owner
|
|
||||||
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
media_type = content_type.split("/")[-1]
|
|
||||||
if media_type == "jpg":
|
|
||||||
media_type = "jpeg"
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"image": {
|
|
||||||
"format": media_type,
|
|
||||||
"source": {"s3Location": s3_location},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif content_type.startswith("video/"):
|
|
||||||
video_format = self._get_video_format(content_type)
|
|
||||||
if video_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"video": {
|
|
||||||
"format": video_format,
|
|
||||||
"source": {"s3Location": s3_location},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
doc_format = self._get_document_format(content_type)
|
|
||||||
if doc_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"document": {
|
|
||||||
"name": name,
|
|
||||||
"format": doc_format,
|
|
||||||
"source": {"s3Location": s3_location},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if isinstance(resolved, InlineBytes):
|
|
||||||
file_bytes = resolved.data
|
|
||||||
else:
|
|
||||||
file_bytes = file_input.read()
|
|
||||||
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
media_type = content_type.split("/")[-1]
|
|
||||||
if media_type == "jpg":
|
|
||||||
media_type = "jpeg"
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"image": {
|
|
||||||
"format": media_type,
|
|
||||||
"source": {"bytes": file_bytes},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif content_type.startswith("video/"):
|
|
||||||
video_format = self._get_video_format(content_type)
|
|
||||||
if video_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"video": {
|
|
||||||
"format": video_format,
|
|
||||||
"source": {"bytes": file_bytes},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
doc_format = self._get_document_format(content_type)
|
|
||||||
if doc_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"document": {
|
|
||||||
"name": name,
|
|
||||||
"format": doc_format,
|
|
||||||
"source": {"bytes": file_bytes},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|
||||||
async def aformat_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Async format files as Bedrock Converse API multimodal content blocks.
|
|
||||||
|
|
||||||
Uses parallel file resolution. S3 uploads are only supported by Nova models.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for S3 uploads.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Bedrock's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBytes,
|
|
||||||
)
|
|
||||||
|
|
||||||
is_nova = self._is_nova_model()
|
|
||||||
s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET")
|
|
||||||
s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER")
|
|
||||||
prefer_upload = bool(s3_bucket) and is_nova
|
|
||||||
|
|
||||||
config = FileResolverConfig(
|
|
||||||
prefer_upload=prefer_upload, use_bytes_for_bedrock=True
|
|
||||||
)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
resolved_files = await resolver.aresolve_files(files, "bedrock")
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
for name, resolved in resolved_files.items():
|
|
||||||
file_input = files[name]
|
|
||||||
content_type = file_input.content_type
|
|
||||||
|
|
||||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
|
||||||
s3_location: dict[str, Any] = {"uri": resolved.file_uri}
|
|
||||||
if s3_bucket_owner:
|
|
||||||
s3_location["bucketOwner"] = s3_bucket_owner
|
|
||||||
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
media_type = content_type.split("/")[-1]
|
|
||||||
if media_type == "jpg":
|
|
||||||
media_type = "jpeg"
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"image": {
|
|
||||||
"format": media_type,
|
|
||||||
"source": {"s3Location": s3_location},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif content_type.startswith("video/"):
|
|
||||||
video_format = self._get_video_format(content_type)
|
|
||||||
if video_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"video": {
|
|
||||||
"format": video_format,
|
|
||||||
"source": {"s3Location": s3_location},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
doc_format = self._get_document_format(content_type)
|
|
||||||
if doc_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"document": {
|
|
||||||
"name": name,
|
|
||||||
"format": doc_format,
|
|
||||||
"source": {"s3Location": s3_location},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if isinstance(resolved, InlineBytes):
|
|
||||||
file_bytes = resolved.data
|
|
||||||
else:
|
|
||||||
file_bytes = await file_input.aread()
|
|
||||||
|
|
||||||
if content_type.startswith("image/"):
|
|
||||||
media_type = content_type.split("/")[-1]
|
|
||||||
if media_type == "jpg":
|
|
||||||
media_type = "jpeg"
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"image": {
|
|
||||||
"format": media_type,
|
|
||||||
"source": {"bytes": file_bytes},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif content_type.startswith("video/"):
|
|
||||||
video_format = self._get_video_format(content_type)
|
|
||||||
if video_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"video": {
|
|
||||||
"format": video_format,
|
|
||||||
"source": {"bytes": file_bytes},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
doc_format = self._get_document_format(content_type)
|
|
||||||
if doc_format:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"document": {
|
|
||||||
"name": name,
|
|
||||||
"format": doc_format,
|
|
||||||
"source": {"bytes": file_bytes},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|||||||
@@ -19,11 +19,6 @@ from crewai.utilities.types import LLMMessage
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import (
|
|
||||||
FileInput,
|
|
||||||
UploadCache,
|
|
||||||
)
|
|
||||||
|
|
||||||
from crewai.llms.hooks.base import BaseInterceptor
|
from crewai.llms.hooks.base import BaseInterceptor
|
||||||
|
|
||||||
|
|
||||||
@@ -1097,138 +1092,6 @@ class GeminiCompletion(BaseLLM):
|
|||||||
"""
|
"""
|
||||||
return ["image/", "audio/", "video/", "application/pdf", "text/"]
|
return ["image/", "audio/", "video/", "application/pdf", "text/"]
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as Gemini multimodal content blocks.
|
|
||||||
|
|
||||||
Gemini supports both inlineData format and file references via File API.
|
|
||||||
Uses FileResolver to determine the best delivery method based on file size.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Gemini's expected format.
|
|
||||||
"""
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
|
|
||||||
for file_input in files.values():
|
|
||||||
content_type = file_input.content_type
|
|
||||||
if not any(content_type.startswith(t) for t in supported_types):
|
|
||||||
continue
|
|
||||||
|
|
||||||
resolved = resolver.resolve(file_input, "gemini")
|
|
||||||
|
|
||||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
|
||||||
# Use file reference format for uploaded files
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"fileData": {
|
|
||||||
"mimeType": resolved.content_type,
|
|
||||||
"fileUri": resolved.file_uri,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif isinstance(resolved, InlineBase64):
|
|
||||||
# Use inline format for smaller files
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"inlineData": {
|
|
||||||
"mimeType": resolved.content_type,
|
|
||||||
"data": resolved.data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fallback to base64 encoding
|
|
||||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"inlineData": {
|
|
||||||
"mimeType": content_type,
|
|
||||||
"data": data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|
||||||
async def aformat_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Async format files as Gemini multimodal content blocks.
|
|
||||||
|
|
||||||
Uses parallel file resolution for improved performance with multiple files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in Gemini's expected format.
|
|
||||||
"""
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
supported_files = {
|
|
||||||
name: f
|
|
||||||
for name, f in files.items()
|
|
||||||
if any(f.content_type.startswith(t) for t in supported_types)
|
|
||||||
}
|
|
||||||
|
|
||||||
if not supported_files:
|
|
||||||
return []
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
resolved_files = await resolver.aresolve_files(supported_files, "gemini")
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
for resolved in resolved_files.values():
|
|
||||||
if isinstance(resolved, FileReference) and resolved.file_uri:
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"fileData": {
|
|
||||||
"mimeType": resolved.content_type,
|
|
||||||
"fileUri": resolved.file_uri,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif isinstance(resolved, InlineBase64):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"inlineData": {
|
|
||||||
"mimeType": resolved.content_type,
|
|
||||||
"data": resolved.data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|
||||||
def format_text_content(self, text: str) -> dict[str, Any]:
|
def format_text_content(self, text: str) -> dict[str, Any]:
|
||||||
"""Format text as a Gemini content block.
|
"""Format text as a Gemini content block.
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -27,8 +26,6 @@ from crewai.utilities.types import LLMMessage
|
|||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from crewai_files import FileInput, UploadCache
|
|
||||||
|
|
||||||
from crewai.agent.core import Agent
|
from crewai.agent.core import Agent
|
||||||
from crewai.llms.hooks.base import BaseInterceptor
|
from crewai.llms.hooks.base import BaseInterceptor
|
||||||
from crewai.task import Task
|
from crewai.task import Task
|
||||||
@@ -1080,136 +1077,3 @@ class OpenAICompletion(BaseLLM):
|
|||||||
if not self.supports_multimodal():
|
if not self.supports_multimodal():
|
||||||
return []
|
return []
|
||||||
return ["image/"]
|
return ["image/"]
|
||||||
|
|
||||||
def format_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Format files as OpenAI multimodal content blocks.
|
|
||||||
|
|
||||||
OpenAI supports both base64 data URLs and file_id references via Files API.
|
|
||||||
Uses FileResolver to determine the best delivery method based on file size.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in OpenAI's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
|
|
||||||
for file_input in files.values():
|
|
||||||
content_type = file_input.content_type
|
|
||||||
if not any(content_type.startswith(t) for t in supported_types):
|
|
||||||
continue
|
|
||||||
|
|
||||||
resolved = resolver.resolve(file_input, "openai")
|
|
||||||
|
|
||||||
if isinstance(resolved, FileReference):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "file",
|
|
||||||
"file": {
|
|
||||||
"file_id": resolved.file_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif isinstance(resolved, InlineBase64):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
data = base64.b64encode(file_input.read()).decode("ascii")
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": f"data:{content_type};base64,{data}"},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|
||||||
async def aformat_multimodal_content(
|
|
||||||
self,
|
|
||||||
files: dict[str, FileInput],
|
|
||||||
upload_cache: UploadCache | None = None,
|
|
||||||
) -> list[dict[str, Any]]:
|
|
||||||
"""Async format files as OpenAI multimodal content blocks.
|
|
||||||
|
|
||||||
Uses parallel file resolution for improved performance with multiple files.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files: Dictionary mapping file names to FileInput objects.
|
|
||||||
upload_cache: Optional cache for tracking uploaded files.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of content blocks in OpenAI's expected format.
|
|
||||||
"""
|
|
||||||
if not self.supports_multimodal():
|
|
||||||
return []
|
|
||||||
|
|
||||||
from crewai_files import (
|
|
||||||
FileReference,
|
|
||||||
FileResolver,
|
|
||||||
FileResolverConfig,
|
|
||||||
InlineBase64,
|
|
||||||
)
|
|
||||||
|
|
||||||
supported_types = self.supported_multimodal_content_types()
|
|
||||||
|
|
||||||
supported_files = {
|
|
||||||
name: f
|
|
||||||
for name, f in files.items()
|
|
||||||
if any(f.content_type.startswith(t) for t in supported_types)
|
|
||||||
}
|
|
||||||
|
|
||||||
if not supported_files:
|
|
||||||
return []
|
|
||||||
|
|
||||||
config = FileResolverConfig(prefer_upload=False)
|
|
||||||
resolver = FileResolver(config=config, upload_cache=upload_cache)
|
|
||||||
resolved_files = await resolver.aresolve_files(supported_files, "openai")
|
|
||||||
|
|
||||||
content_blocks: list[dict[str, Any]] = []
|
|
||||||
for resolved in resolved_files.values():
|
|
||||||
if isinstance(resolved, FileReference):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "file",
|
|
||||||
"file": {
|
|
||||||
"file_id": resolved.file_id,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif isinstance(resolved, InlineBase64):
|
|
||||||
content_blocks.append(
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": f"data:{resolved.content_type};base64,{resolved.data}"
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return content_blocks
|
|
||||||
|
|||||||
1
uv.lock
generated
1
uv.lock
generated
@@ -1345,7 +1345,6 @@ requires-dist = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crewai-files"
|
name = "crewai-files"
|
||||||
version = "0.1.0"
|
|
||||||
source = { editable = "lib/crewai-files" }
|
source = { editable = "lib/crewai-files" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiocache" },
|
{ name = "aiocache" },
|
||||||
|
|||||||
Reference in New Issue
Block a user