diff --git a/lib/crewai-files/pyproject.toml b/lib/crewai-files/pyproject.toml index 9b3e6a8a3..5b04b3992 100644 --- a/lib/crewai-files/pyproject.toml +++ b/lib/crewai-files/pyproject.toml @@ -1,10 +1,10 @@ [project] name = "crewai-files" -version = "0.1.0" +dynamic = ["version"] description = "Add your description here" readme = "README.md" authors = [ - { name = "Greyson LaLonde", email = "greyson.r.lalonde@gmail.com" } + { name = "Greyson LaLonde", email = "greyson@crewai.com" } ] requires-python = ">=3.10, <3.14" dependencies = [ @@ -17,9 +17,9 @@ dependencies = [ "av~=13.0.0", ] -[project.scripts] -crewai-files = "crewai_files:main" - [build-system] requires = ["hatchling"] build-backend = "hatchling.build" + +[tool.hatch.version] +path = "src/crewai_files/__init__.py" diff --git a/lib/crewai-files/src/crewai_files/__init__.py b/lib/crewai-files/src/crewai_files/__init__.py index 4c874fbe0..acea89b6a 100644 --- a/lib/crewai-files/src/crewai_files/__init__.py +++ b/lib/crewai-files/src/crewai_files/__init__.py @@ -49,6 +49,10 @@ from crewai_files.core.types import ( VideoFile, VideoMimeType, ) +from crewai_files.formatting import ( + aformat_multimodal_content, + format_multimodal_content, +) from crewai_files.processing import ( ANTHROPIC_CONSTRAINTS, BEDROCK_CONSTRAINTS, @@ -132,10 +136,12 @@ __all__ = [ "VideoExtension", "VideoFile", "VideoMimeType", + "aformat_multimodal_content", "cleanup_expired_files", "cleanup_provider_files", "cleanup_uploaded_files", "create_resolver", + "format_multimodal_content", "get_constraints_for_provider", "get_upload_cache", "get_uploader", @@ -143,3 +149,5 @@ __all__ = [ "reset_upload_cache", "wrap_file_source", ] + +__version__ = "1.8.1" diff --git a/lib/crewai-files/src/crewai_files/formatting/__init__.py b/lib/crewai-files/src/crewai_files/formatting/__init__.py new file mode 100644 index 000000000..3c41bac49 --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/__init__.py @@ -0,0 +1,12 @@ +"""High-level formatting API for multimodal content.""" + +from crewai_files.formatting.api import ( + aformat_multimodal_content, + format_multimodal_content, +) + + +__all__ = [ + "aformat_multimodal_content", + "format_multimodal_content", +] diff --git a/lib/crewai-files/src/crewai_files/formatting/anthropic.py b/lib/crewai-files/src/crewai_files/formatting/anthropic.py new file mode 100644 index 000000000..a77e0ccee --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/anthropic.py @@ -0,0 +1,91 @@ +"""Anthropic content block formatter.""" + +from __future__ import annotations + +import base64 +from typing import Any + +from crewai_files.core.resolved import ( + FileReference, + InlineBase64, + ResolvedFile, + UrlReference, +) +from crewai_files.core.types import FileInput + + +class AnthropicFormatter: + """Formats resolved files into Anthropic content blocks.""" + + def format_block( + self, + file: FileInput, + resolved: ResolvedFile, + ) -> dict[str, Any] | None: + """Format a resolved file into an Anthropic content block. + + Args: + file: Original file input with metadata. + resolved: Resolved file. + + Returns: + Content block dict or None if not supported. + """ + content_type = file.content_type + block_type = self._get_block_type(content_type) + if block_type is None: + return None + + if isinstance(resolved, FileReference): + return { + "type": block_type, + "source": { + "type": "file", + "file_id": resolved.file_id, + }, + } + + if isinstance(resolved, UrlReference): + return { + "type": block_type, + "source": { + "type": "url", + "url": resolved.url, + }, + } + + if isinstance(resolved, InlineBase64): + return { + "type": block_type, + "source": { + "type": "base64", + "media_type": resolved.content_type, + "data": resolved.data, + }, + } + + data = base64.b64encode(file.read()).decode("ascii") + return { + "type": block_type, + "source": { + "type": "base64", + "media_type": content_type, + "data": data, + }, + } + + @staticmethod + def _get_block_type(content_type: str) -> str | None: + """Get Anthropic block type for content type. + + Args: + content_type: MIME type. + + Returns: + Block type string or None if not supported. + """ + if content_type.startswith("image/"): + return "image" + if content_type == "application/pdf": + return "document" + return None diff --git a/lib/crewai-files/src/crewai_files/formatting/api.py b/lib/crewai-files/src/crewai_files/formatting/api.py new file mode 100644 index 000000000..5d7fdecf8 --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/api.py @@ -0,0 +1,277 @@ +"""High-level API for formatting multimodal content.""" + +from __future__ import annotations + +import os +from typing import Any + +from crewai_files.cache.upload_cache import get_upload_cache +from crewai_files.core.types import FileInput +from crewai_files.formatting.anthropic import AnthropicFormatter +from crewai_files.formatting.bedrock import BedrockFormatter +from crewai_files.formatting.gemini import GeminiFormatter +from crewai_files.formatting.openai import OpenAIFormatter +from crewai_files.processing.constraints import get_constraints_for_provider +from crewai_files.processing.processor import FileProcessor +from crewai_files.resolution.resolver import FileResolver, FileResolverConfig +from crewai_files.uploaders.factory import ProviderType + + +def _normalize_provider(provider: str | None) -> ProviderType: + """Normalize provider string to ProviderType. + + Args: + provider: Raw provider string. + + Returns: + Normalized provider type. + + Raises: + ValueError: If provider is None or empty. + """ + if not provider: + raise ValueError("provider is required") + + provider_lower = provider.lower() + + if "gemini" in provider_lower: + return "gemini" + if "google" in provider_lower: + return "google" + if "anthropic" in provider_lower: + return "anthropic" + if "claude" in provider_lower: + return "claude" + if "bedrock" in provider_lower: + return "bedrock" + if "aws" in provider_lower: + return "aws" + if "azure" in provider_lower: + return "azure" + if "gpt" in provider_lower: + return "gpt" + + return "openai" + + +def format_multimodal_content( + files: dict[str, FileInput], + provider: str | None = None, +) -> list[dict[str, Any]]: + """Format files as provider-specific multimodal content blocks. + + This is the main high-level API for converting files to content blocks + suitable for sending to LLM providers. It handles: + - File processing according to provider constraints + - Resolution (upload vs inline) based on provider capabilities + - Formatting into provider-specific content block structures + + Args: + files: Dictionary mapping file names to FileInput objects. + provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini"). + + Returns: + List of content blocks in the provider's expected format. + + Example: + >>> from crewai_files import format_multimodal_content, ImageFile + >>> files = {"photo": ImageFile(source="image.jpg")} + >>> blocks = format_multimodal_content(files, "openai") + """ + if not files: + return [] + + provider_type = _normalize_provider(provider) + + processor = FileProcessor(constraints=provider_type) + processed_files = processor.process_files(files) + + if not processed_files: + return [] + + constraints = get_constraints_for_provider(provider_type) + supported_types = _get_supported_types(constraints) + supported_files = _filter_supported_files(processed_files, supported_types) + + if not supported_files: + return [] + + config = _get_resolver_config(provider_type) + upload_cache = get_upload_cache() + resolver = FileResolver(config=config, upload_cache=upload_cache) + + formatter = _get_formatter(provider_type) + content_blocks: list[dict[str, Any]] = [] + + for name, file_input in supported_files.items(): + resolved = resolver.resolve(file_input, provider_type) + block = _format_block(formatter, file_input, resolved, name) + if block is not None: + content_blocks.append(block) + + return content_blocks + + +async def aformat_multimodal_content( + files: dict[str, FileInput], + provider: str | None = None, +) -> list[dict[str, Any]]: + """Async format files as provider-specific multimodal content blocks. + + Async version of format_multimodal_content with parallel file resolution. + + Args: + files: Dictionary mapping file names to FileInput objects. + provider: Provider name (e.g., "openai", "anthropic", "bedrock", "gemini"). + + Returns: + List of content blocks in the provider's expected format. + """ + if not files: + return [] + + provider_type = _normalize_provider(provider) + + processor = FileProcessor(constraints=provider_type) + processed_files = await processor.aprocess_files(files) + + if not processed_files: + return [] + + constraints = get_constraints_for_provider(provider_type) + supported_types = _get_supported_types(constraints) + supported_files = _filter_supported_files(processed_files, supported_types) + + if not supported_files: + return [] + + config = _get_resolver_config(provider_type) + upload_cache = get_upload_cache() + resolver = FileResolver(config=config, upload_cache=upload_cache) + + resolved_files = await resolver.aresolve_files(supported_files, provider_type) + + formatter = _get_formatter(provider_type) + content_blocks: list[dict[str, Any]] = [] + + for name, resolved in resolved_files.items(): + file_input = supported_files[name] + block = _format_block(formatter, file_input, resolved, name) + if block is not None: + content_blocks.append(block) + + return content_blocks + + +def _get_supported_types( + constraints: Any | None, +) -> list[str]: + """Get list of supported MIME type prefixes from constraints. + + Args: + constraints: Provider constraints. + + Returns: + List of MIME type prefixes (e.g., ["image/", "application/pdf"]). + """ + if constraints is None: + return [] + + supported: list[str] = [] + if constraints.image is not None: + supported.append("image/") + if constraints.pdf is not None: + supported.append("application/pdf") + if constraints.audio is not None: + supported.append("audio/") + if constraints.video is not None: + supported.append("video/") + return supported + + +def _filter_supported_files( + files: dict[str, FileInput], + supported_types: list[str], +) -> dict[str, FileInput]: + """Filter files to those with supported content types. + + Args: + files: All files. + supported_types: MIME type prefixes to allow. + + Returns: + Filtered dictionary of supported files. + """ + return { + name: f + for name, f in files.items() + if any(f.content_type.startswith(t) for t in supported_types) + } + + +def _get_resolver_config(provider_lower: str) -> FileResolverConfig: + """Get resolver config for provider. + + Args: + provider_lower: Lowercase provider name. + + Returns: + Configured FileResolverConfig. + """ + if "bedrock" in provider_lower: + s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET") + prefer_upload = bool(s3_bucket) + return FileResolverConfig( + prefer_upload=prefer_upload, use_bytes_for_bedrock=True + ) + + return FileResolverConfig(prefer_upload=False) + + +def _get_formatter( + provider_lower: str, +) -> OpenAIFormatter | AnthropicFormatter | BedrockFormatter | GeminiFormatter: + """Get formatter for provider. + + Args: + provider_lower: Lowercase provider name. + + Returns: + Provider-specific formatter instance. + """ + if "anthropic" in provider_lower or "claude" in provider_lower: + return AnthropicFormatter() + + if "bedrock" in provider_lower or "aws" in provider_lower: + s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER") + return BedrockFormatter(s3_bucket_owner=s3_bucket_owner) + + if "gemini" in provider_lower or "google" in provider_lower: + return GeminiFormatter() + + return OpenAIFormatter() + + +def _format_block( + formatter: OpenAIFormatter + | AnthropicFormatter + | BedrockFormatter + | GeminiFormatter, + file_input: FileInput, + resolved: Any, + name: str, +) -> dict[str, Any] | None: + """Format a single file block using the appropriate formatter. + + Args: + formatter: Provider formatter. + file_input: Original file input. + resolved: Resolved file. + name: File name. + + Returns: + Content block dict or None. + """ + if isinstance(formatter, BedrockFormatter): + return formatter.format_block(file_input, resolved, name=name) + return formatter.format_block(file_input, resolved) diff --git a/lib/crewai-files/src/crewai_files/formatting/base.py b/lib/crewai-files/src/crewai_files/formatting/base.py new file mode 100644 index 000000000..a32b6fdb2 --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/base.py @@ -0,0 +1,28 @@ +"""Base formatter protocol for provider-specific content blocks.""" + +from __future__ import annotations + +from typing import Any, Protocol + +from crewai_files.core.resolved import ResolvedFile +from crewai_files.core.types import FileInput + + +class ContentFormatter(Protocol): + """Protocol for formatting resolved files into provider content blocks.""" + + def format_block( + self, + file: FileInput, + resolved: ResolvedFile, + ) -> dict[str, Any] | None: + """Format a resolved file into a provider-specific content block. + + Args: + file: Original file input with metadata. + resolved: Resolved file (FileReference, InlineBase64, etc.). + + Returns: + Content block dict or None if file type not supported. + """ + ... diff --git a/lib/crewai-files/src/crewai_files/formatting/bedrock.py b/lib/crewai-files/src/crewai_files/formatting/bedrock.py new file mode 100644 index 000000000..c838409a9 --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/bedrock.py @@ -0,0 +1,188 @@ +"""Bedrock content block formatter.""" + +from __future__ import annotations + +from typing import Any + +from crewai_files.core.resolved import ( + FileReference, + InlineBytes, + ResolvedFile, +) +from crewai_files.core.types import FileInput + + +_DOCUMENT_FORMATS: dict[str, str] = { + "application/pdf": "pdf", + "text/csv": "csv", + "text/plain": "txt", + "text/markdown": "md", + "text/html": "html", + "application/msword": "doc", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx", + "application/vnd.ms-excel": "xls", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx", +} + +_VIDEO_FORMATS: dict[str, str] = { + "video/mp4": "mp4", + "video/quicktime": "mov", + "video/x-matroska": "mkv", + "video/webm": "webm", + "video/x-flv": "flv", + "video/mpeg": "mpeg", + "video/3gpp": "three_gp", +} + + +class BedrockFormatter: + """Formats resolved files into Bedrock Converse API content blocks.""" + + def __init__(self, s3_bucket_owner: str | None = None) -> None: + """Initialize formatter. + + Args: + s3_bucket_owner: Optional S3 bucket owner for file references. + """ + self.s3_bucket_owner = s3_bucket_owner + + def format_block( + self, + file: FileInput, + resolved: ResolvedFile, + name: str | None = None, + ) -> dict[str, Any] | None: + """Format a resolved file into a Bedrock content block. + + Args: + file: Original file input with metadata. + resolved: Resolved file. + name: File name (required for document blocks). + + Returns: + Content block dict or None if not supported. + """ + content_type = file.content_type + + if isinstance(resolved, FileReference) and resolved.file_uri: + return self._format_s3_block(content_type, resolved.file_uri, name) + + if isinstance(resolved, InlineBytes): + file_bytes = resolved.data + else: + file_bytes = file.read() + + return self._format_bytes_block(content_type, file_bytes, name) + + def _format_s3_block( + self, + content_type: str, + file_uri: str, + name: str | None, + ) -> dict[str, Any] | None: + """Format block with S3 location source. + + Args: + content_type: MIME type. + file_uri: S3 URI. + name: File name for documents. + + Returns: + Content block dict or None. + """ + s3_location: dict[str, Any] = {"uri": file_uri} + if self.s3_bucket_owner: + s3_location["bucketOwner"] = self.s3_bucket_owner + + if content_type.startswith("image/"): + return { + "image": { + "format": self._get_image_format(content_type), + "source": {"s3Location": s3_location}, + } + } + + if content_type.startswith("video/"): + video_format = _VIDEO_FORMATS.get(content_type) + if video_format: + return { + "video": { + "format": video_format, + "source": {"s3Location": s3_location}, + } + } + return None + + doc_format = _DOCUMENT_FORMATS.get(content_type) + if doc_format: + return { + "document": { + "name": name or "document", + "format": doc_format, + "source": {"s3Location": s3_location}, + } + } + + return None + + def _format_bytes_block( + self, + content_type: str, + file_bytes: bytes, + name: str | None, + ) -> dict[str, Any] | None: + """Format block with inline bytes source. + + Args: + content_type: MIME type. + file_bytes: Raw file bytes. + name: File name for documents. + + Returns: + Content block dict or None. + """ + if content_type.startswith("image/"): + return { + "image": { + "format": self._get_image_format(content_type), + "source": {"bytes": file_bytes}, + } + } + + if content_type.startswith("video/"): + video_format = _VIDEO_FORMATS.get(content_type) + if video_format: + return { + "video": { + "format": video_format, + "source": {"bytes": file_bytes}, + } + } + return None + + doc_format = _DOCUMENT_FORMATS.get(content_type) + if doc_format: + return { + "document": { + "name": name or "document", + "format": doc_format, + "source": {"bytes": file_bytes}, + } + } + + return None + + @staticmethod + def _get_image_format(content_type: str) -> str: + """Get Bedrock image format from content type. + + Args: + content_type: MIME type. + + Returns: + Format string for Bedrock. + """ + media_type = content_type.split("/")[-1] + if media_type == "jpg": + return "jpeg" + return media_type diff --git a/lib/crewai-files/src/crewai_files/formatting/gemini.py b/lib/crewai-files/src/crewai_files/formatting/gemini.py new file mode 100644 index 000000000..3bb09dbab --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/gemini.py @@ -0,0 +1,66 @@ +"""Gemini content block formatter.""" + +from __future__ import annotations + +import base64 +from typing import Any + +from crewai_files.core.resolved import ( + FileReference, + InlineBase64, + ResolvedFile, + UrlReference, +) +from crewai_files.core.types import FileInput + + +class GeminiFormatter: + """Formats resolved files into Gemini content blocks.""" + + def format_block( + self, + file: FileInput, + resolved: ResolvedFile, + ) -> dict[str, Any] | None: + """Format a resolved file into a Gemini content block. + + Args: + file: Original file input with metadata. + resolved: Resolved file. + + Returns: + Content block dict or None if not supported. + """ + content_type = file.content_type + + if isinstance(resolved, FileReference) and resolved.file_uri: + return { + "fileData": { + "mimeType": resolved.content_type, + "fileUri": resolved.file_uri, + } + } + + if isinstance(resolved, UrlReference): + return { + "fileData": { + "mimeType": content_type, + "fileUri": resolved.url, + } + } + + if isinstance(resolved, InlineBase64): + return { + "inlineData": { + "mimeType": resolved.content_type, + "data": resolved.data, + } + } + + data = base64.b64encode(file.read()).decode("ascii") + return { + "inlineData": { + "mimeType": content_type, + "data": data, + } + } diff --git a/lib/crewai-files/src/crewai_files/formatting/openai.py b/lib/crewai-files/src/crewai_files/formatting/openai.py new file mode 100644 index 000000000..9e9fb8982 --- /dev/null +++ b/lib/crewai-files/src/crewai_files/formatting/openai.py @@ -0,0 +1,60 @@ +"""OpenAI content block formatter.""" + +from __future__ import annotations + +import base64 +from typing import Any + +from crewai_files.core.resolved import ( + FileReference, + InlineBase64, + ResolvedFile, + UrlReference, +) +from crewai_files.core.types import FileInput + + +class OpenAIFormatter: + """Formats resolved files into OpenAI content blocks.""" + + def format_block( + self, + file: FileInput, + resolved: ResolvedFile, + ) -> dict[str, Any] | None: + """Format a resolved file into an OpenAI content block. + + Args: + file: Original file input with metadata. + resolved: Resolved file. + + Returns: + Content block dict or None if not supported. + """ + content_type = file.content_type + + if isinstance(resolved, FileReference): + return { + "type": "file", + "file": {"file_id": resolved.file_id}, + } + + if isinstance(resolved, UrlReference): + return { + "type": "image_url", + "image_url": {"url": resolved.url}, + } + + if isinstance(resolved, InlineBase64): + return { + "type": "image_url", + "image_url": { + "url": f"data:{resolved.content_type};base64,{resolved.data}" + }, + } + + data = base64.b64encode(file.read()).decode("ascii") + return { + "type": "image_url", + "image_url": {"url": f"data:{content_type};base64,{data}"}, + } diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index d72026abe..20df6d9dd 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -10,7 +10,7 @@ from collections.abc import Callable import logging from typing import TYPE_CHECKING, Any, Literal, cast -from crewai_files import FileProcessor +from crewai_files import aformat_multimodal_content, format_multimodal_content from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError from pydantic_core import CoreSchema, core_schema @@ -220,9 +220,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): """Inject files as multimodal content into messages. For crews with input files and LLMs that support multimodal, - processes files according to provider constraints and file handling mode, - then delegates to the LLM's format_multimodal_content method to - generate provider-specific content blocks. + uses crewai_files to process, resolve, and format files into + provider-specific content blocks. """ if not self.crew or not self.task: return @@ -235,15 +234,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): return provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "") - processor = FileProcessor(constraints=provider) - files = processor.process_files(files) + content_blocks = format_multimodal_content(files, provider) - from crewai_files import get_upload_cache - - upload_cache = get_upload_cache() - content_blocks = self.llm.format_multimodal_content( - files, upload_cache=upload_cache - ) if not content_blocks: return @@ -262,9 +254,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): """Async inject files as multimodal content into messages. For crews with input files and LLMs that support multimodal, - processes files according to provider constraints using parallel processing, - then delegates to the LLM's aformat_multimodal_content method to - generate provider-specific content blocks with parallel file resolution. + uses crewai_files to process, resolve, and format files into + provider-specific content blocks with parallel file resolution. """ if not self.crew or not self.task: return @@ -277,15 +268,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): return provider = getattr(self.llm, "provider", None) or getattr(self.llm, "model", "") - processor = FileProcessor(constraints=provider) - files = await processor.aprocess_files(files) + content_blocks = await aformat_multimodal_content(files, provider) - from crewai_files import get_upload_cache - - upload_cache = get_upload_cache() - content_blocks = await self.llm.aformat_multimodal_content( - files, upload_cache=upload_cache - ) if not content_blocks: return diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index a17a8c08b..09f2276b4 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -53,7 +53,6 @@ from crewai.utilities.logger_utils import suppress_warnings if TYPE_CHECKING: - from crewai_files import FileInput, UploadCache from litellm.exceptions import ContextWindowExceededError from litellm.litellm_core_utils.get_supported_openai_params import ( get_supported_openai_params, @@ -2254,66 +2253,3 @@ class LLM(BaseLLM): if "claude-3" in model_lower or "claude-4" in model_lower: return ["image/", "application/pdf"] return ["image/"] - - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Format files as multimodal content blocks for litellm. - - Uses OpenAI-compatible format which litellm translates to provider format. - Uses FileResolver for consistent base64 encoding. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache (not used by litellm but kept for interface consistency). - - Returns: - List of content blocks in OpenAI's expected format. - """ - import base64 - - from crewai_files import ( - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - if not self.supports_multimodal(): - return [] - - content_blocks: list[dict[str, Any]] = [] - supported_types = self.supported_multimodal_content_types() - - # LiteLLM uses OpenAI-compatible format - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - - for file_input in files.values(): - content_type = file_input.content_type - if not any(content_type.startswith(t) for t in supported_types): - continue - - resolved = resolver.resolve(file_input, "openai") - - if isinstance(resolved, InlineBase64): - content_blocks.append( - { - "type": "image_url", - "image_url": { - "url": f"data:{resolved.content_type};base64,{resolved.data}" - }, - } - ) - else: - # Fallback to direct base64 encoding - data = base64.b64encode(file_input.read()).decode("ascii") - content_blocks.append( - { - "type": "image_url", - "image_url": {"url": f"data:{content_type};base64,{data}"}, - } - ) - - return content_blocks diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 30be5fab1..a55b06998 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -32,8 +32,6 @@ from crewai.types.usage_metrics import UsageMetrics if TYPE_CHECKING: - from crewai_files import FileInput, UploadCache - from crewai.agent.core import Agent from crewai.task import Task from crewai.tools.base_tool import BaseTool @@ -298,43 +296,6 @@ class BaseLLM(ABC): """ return [] - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Format files as multimodal content blocks for the LLM. - - Subclasses should override this to provide provider-specific formatting. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - - Returns: - List of content blocks in the provider's expected format. - """ - return [] - - async def aformat_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Async format files as multimodal content blocks for the LLM. - - Default implementation calls the sync version. Subclasses should - override to use async file resolution for parallel processing. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - - Returns: - List of content blocks in the provider's expected format. - """ - return self.format_multimodal_content(files, upload_cache) - def format_text_content(self, text: str) -> dict[str, Any]: """Format text as a content block for the LLM. diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 6b0bb82cc..1b9dfac4d 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -1,6 +1,5 @@ from __future__ import annotations -import base64 import json import logging import os @@ -20,12 +19,8 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai_files import FileInput, UploadCache - from crewai.llms.hooks.base import BaseInterceptor -DEFAULT_CACHE_TTL = "ephemeral" - try: from anthropic import Anthropic, AsyncAnthropic from anthropic.types import Message, TextBlock, ThinkingBlock, ToolUseBlock @@ -1256,222 +1251,3 @@ class AnthropicCompletion(BaseLLM): if not self.supports_multimodal(): return [] return ["image/", "application/pdf"] - - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - enable_caching: bool = True, - cache_ttl: str | None = None, - ) -> list[dict[str, Any]]: - """Format files as Anthropic multimodal content blocks. - - Anthropic supports both base64 inline format and file references via Files API. - Uses FileResolver to determine the best delivery method based on file size. - Supports prompt caching to reduce costs and latency for repeated file usage. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - enable_caching: Whether to add cache_control markers (default: True). - cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models). - - Returns: - List of content blocks in Anthropic's expected format. - """ - if not self.supports_multimodal(): - return [] - - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - content_blocks: list[dict[str, Any]] = [] - supported_types = self.supported_multimodal_content_types() - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - - file_list = list(files.values()) - num_files = len(file_list) - - for i, file_input in enumerate(file_list): - content_type = file_input.content_type - if not any(content_type.startswith(t) for t in supported_types): - continue - - resolved = resolver.resolve(file_input, "anthropic") - block: dict[str, Any] = {} - - if isinstance(resolved, FileReference): - if content_type.startswith("image/"): - block = { - "type": "image", - "source": { - "type": "file", - "file_id": resolved.file_id, - }, - } - elif content_type == "application/pdf": - block = { - "type": "document", - "source": { - "type": "file", - "file_id": resolved.file_id, - }, - } - elif isinstance(resolved, InlineBase64): - if content_type.startswith("image/"): - block = { - "type": "image", - "source": { - "type": "base64", - "media_type": resolved.content_type, - "data": resolved.data, - }, - } - elif content_type == "application/pdf": - block = { - "type": "document", - "source": { - "type": "base64", - "media_type": resolved.content_type, - "data": resolved.data, - }, - } - else: - data = base64.b64encode(file_input.read()).decode("ascii") - if content_type.startswith("image/"): - block = { - "type": "image", - "source": { - "type": "base64", - "media_type": content_type, - "data": data, - }, - } - elif content_type == "application/pdf": - block = { - "type": "document", - "source": { - "type": "base64", - "media_type": content_type, - "data": data, - }, - } - - if block and enable_caching and i == num_files - 1: - cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL} - block["cache_control"] = cache_control - - if block: - content_blocks.append(block) - - return content_blocks - - async def aformat_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - enable_caching: bool = True, - cache_ttl: str | None = None, - ) -> list[dict[str, Any]]: - """Async format files as Anthropic multimodal content blocks. - - Uses parallel file resolution for improved performance with multiple files. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - enable_caching: Whether to add cache_control markers (default: True). - cache_ttl: Cache TTL - "ephemeral" (5min) or "1h" (1hr for supported models). - - Returns: - List of content blocks in Anthropic's expected format. - """ - if not self.supports_multimodal(): - return [] - - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - supported_types = self.supported_multimodal_content_types() - - supported_files = { - name: f - for name, f in files.items() - if any(f.content_type.startswith(t) for t in supported_types) - } - - if not supported_files: - return [] - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - resolved_files = await resolver.aresolve_files(supported_files, "anthropic") - - content_blocks: list[dict[str, Any]] = [] - num_files = len(resolved_files) - file_names = list(supported_files.keys()) - - for i, name in enumerate(file_names): - if name not in resolved_files: - continue - - resolved = resolved_files[name] - file_input = supported_files[name] - content_type = file_input.content_type - block: dict[str, Any] = {} - - if isinstance(resolved, FileReference): - if content_type.startswith("image/"): - block = { - "type": "image", - "source": { - "type": "file", - "file_id": resolved.file_id, - }, - } - elif content_type == "application/pdf": - block = { - "type": "document", - "source": { - "type": "file", - "file_id": resolved.file_id, - }, - } - elif isinstance(resolved, InlineBase64): - if content_type.startswith("image/"): - block = { - "type": "image", - "source": { - "type": "base64", - "media_type": resolved.content_type, - "data": resolved.data, - }, - } - elif content_type == "application/pdf": - block = { - "type": "document", - "source": { - "type": "base64", - "media_type": resolved.content_type, - "data": resolved.data, - }, - } - - if block and enable_caching and i == num_files - 1: - cache_control: dict[str, str] = {"type": cache_ttl or DEFAULT_CACHE_TTL} - block["cache_control"] = cache_control - - if block: - content_blocks.append(block) - - return content_blocks diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 52dfee3eb..6b3116af2 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -1,6 +1,5 @@ from __future__ import annotations -import base64 import json import logging import os @@ -18,8 +17,6 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai_files import FileInput, UploadCache - from crewai.llms.hooks.base import BaseInterceptor @@ -1040,115 +1037,3 @@ class AzureCompletion(BaseLLM): if not self.supports_multimodal(): return [] return ["image/"] - - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Format files as Azure OpenAI multimodal content blocks. - - Azure OpenAI uses the same image_url format as OpenAI. - Uses FileResolver for consistent base64 encoding. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache (not used by Azure but kept for interface consistency). - - Returns: - List of content blocks in Azure OpenAI's expected format. - """ - if not self.supports_multimodal(): - return [] - - from crewai_files import ( - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - content_blocks: list[dict[str, Any]] = [] - supported_types = self.supported_multimodal_content_types() - - # Azure doesn't support file uploads for images, so just use inline - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - - for file_input in files.values(): - content_type = file_input.content_type - if not any(content_type.startswith(t) for t in supported_types): - continue - - resolved = resolver.resolve(file_input, "azure") - - if isinstance(resolved, InlineBase64): - content_blocks.append( - { - "type": "image_url", - "image_url": { - "url": f"data:{resolved.content_type};base64,{resolved.data}" - }, - } - ) - else: - # Fallback to direct base64 encoding - data = base64.b64encode(file_input.read()).decode("ascii") - content_blocks.append( - { - "type": "image_url", - "image_url": {"url": f"data:{content_type};base64,{data}"}, - } - ) - - return content_blocks - - async def aformat_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Async format files as Azure OpenAI multimodal content blocks. - - Uses parallel file resolution for improved performance with multiple files. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache (not used by Azure but kept for interface consistency). - - Returns: - List of content blocks in Azure OpenAI's expected format. - """ - if not self.supports_multimodal(): - return [] - - from crewai_files import ( - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - supported_types = self.supported_multimodal_content_types() - - supported_files = { - name: f - for name, f in files.items() - if any(f.content_type.startswith(t) for t in supported_types) - } - - if not supported_files: - return [] - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - resolved_files = await resolver.aresolve_files(supported_files, "azure") - - return [ - { - "type": "image_url", - "image_url": { - "url": f"data:{resolved.content_type};base64,{resolved.data}" - }, - } - for resolved in resolved_files.values() - if isinstance(resolved, InlineBase64) - ] diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index 22ce0ed13..d15e876a2 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -20,7 +20,6 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai_files import FileInput, UploadCache from mypy_boto3_bedrock_runtime.type_defs import ( GuardrailConfigurationTypeDef, GuardrailStreamConfigurationTypeDef, @@ -1563,260 +1562,3 @@ class BedrockCompletion(BaseLLM): "video/3gpp": "three_gp", } return format_map.get(content_type) - - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Format files as Bedrock Converse API multimodal content blocks. - - Bedrock Converse API supports both raw bytes and S3 URI references. - S3 uploads are only supported by Amazon Nova models. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for S3 uploads. - - Returns: - List of content blocks in Bedrock's expected format. - """ - if not self.supports_multimodal(): - return [] - - import os - - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBytes, - ) - - content_blocks: list[dict[str, Any]] = [] - is_nova = self._is_nova_model() - - s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET") - s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER") - prefer_upload = bool(s3_bucket) and is_nova - - config = FileResolverConfig( - prefer_upload=prefer_upload, use_bytes_for_bedrock=True - ) - resolver = FileResolver(config=config, upload_cache=upload_cache) - - for name, file_input in files.items(): - content_type = file_input.content_type - resolved = resolver.resolve(file_input, "bedrock") - - if isinstance(resolved, FileReference) and resolved.file_uri: - s3_location: dict[str, Any] = {"uri": resolved.file_uri} - if s3_bucket_owner: - s3_location["bucketOwner"] = s3_bucket_owner - - if content_type.startswith("image/"): - media_type = content_type.split("/")[-1] - if media_type == "jpg": - media_type = "jpeg" - content_blocks.append( - { - "image": { - "format": media_type, - "source": {"s3Location": s3_location}, - } - } - ) - elif content_type.startswith("video/"): - video_format = self._get_video_format(content_type) - if video_format: - content_blocks.append( - { - "video": { - "format": video_format, - "source": {"s3Location": s3_location}, - } - } - ) - else: - doc_format = self._get_document_format(content_type) - if doc_format: - content_blocks.append( - { - "document": { - "name": name, - "format": doc_format, - "source": {"s3Location": s3_location}, - } - } - ) - else: - if isinstance(resolved, InlineBytes): - file_bytes = resolved.data - else: - file_bytes = file_input.read() - - if content_type.startswith("image/"): - media_type = content_type.split("/")[-1] - if media_type == "jpg": - media_type = "jpeg" - content_blocks.append( - { - "image": { - "format": media_type, - "source": {"bytes": file_bytes}, - } - } - ) - elif content_type.startswith("video/"): - video_format = self._get_video_format(content_type) - if video_format: - content_blocks.append( - { - "video": { - "format": video_format, - "source": {"bytes": file_bytes}, - } - } - ) - else: - doc_format = self._get_document_format(content_type) - if doc_format: - content_blocks.append( - { - "document": { - "name": name, - "format": doc_format, - "source": {"bytes": file_bytes}, - } - } - ) - - return content_blocks - - async def aformat_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Async format files as Bedrock Converse API multimodal content blocks. - - Uses parallel file resolution. S3 uploads are only supported by Nova models. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for S3 uploads. - - Returns: - List of content blocks in Bedrock's expected format. - """ - if not self.supports_multimodal(): - return [] - - import os - - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBytes, - ) - - is_nova = self._is_nova_model() - s3_bucket = os.environ.get("CREWAI_BEDROCK_S3_BUCKET") - s3_bucket_owner = os.environ.get("CREWAI_BEDROCK_S3_BUCKET_OWNER") - prefer_upload = bool(s3_bucket) and is_nova - - config = FileResolverConfig( - prefer_upload=prefer_upload, use_bytes_for_bedrock=True - ) - resolver = FileResolver(config=config, upload_cache=upload_cache) - resolved_files = await resolver.aresolve_files(files, "bedrock") - - content_blocks: list[dict[str, Any]] = [] - for name, resolved in resolved_files.items(): - file_input = files[name] - content_type = file_input.content_type - - if isinstance(resolved, FileReference) and resolved.file_uri: - s3_location: dict[str, Any] = {"uri": resolved.file_uri} - if s3_bucket_owner: - s3_location["bucketOwner"] = s3_bucket_owner - - if content_type.startswith("image/"): - media_type = content_type.split("/")[-1] - if media_type == "jpg": - media_type = "jpeg" - content_blocks.append( - { - "image": { - "format": media_type, - "source": {"s3Location": s3_location}, - } - } - ) - elif content_type.startswith("video/"): - video_format = self._get_video_format(content_type) - if video_format: - content_blocks.append( - { - "video": { - "format": video_format, - "source": {"s3Location": s3_location}, - } - } - ) - else: - doc_format = self._get_document_format(content_type) - if doc_format: - content_blocks.append( - { - "document": { - "name": name, - "format": doc_format, - "source": {"s3Location": s3_location}, - } - } - ) - else: - if isinstance(resolved, InlineBytes): - file_bytes = resolved.data - else: - file_bytes = await file_input.aread() - - if content_type.startswith("image/"): - media_type = content_type.split("/")[-1] - if media_type == "jpg": - media_type = "jpeg" - content_blocks.append( - { - "image": { - "format": media_type, - "source": {"bytes": file_bytes}, - } - } - ) - elif content_type.startswith("video/"): - video_format = self._get_video_format(content_type) - if video_format: - content_blocks.append( - { - "video": { - "format": video_format, - "source": {"bytes": file_bytes}, - } - } - ) - else: - doc_format = self._get_document_format(content_type) - if doc_format: - content_blocks.append( - { - "document": { - "name": name, - "format": doc_format, - "source": {"bytes": file_bytes}, - } - } - ) - - return content_blocks diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index 97ef57315..1fa078b30 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -19,11 +19,6 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai_files import ( - FileInput, - UploadCache, - ) - from crewai.llms.hooks.base import BaseInterceptor @@ -1097,138 +1092,6 @@ class GeminiCompletion(BaseLLM): """ return ["image/", "audio/", "video/", "application/pdf", "text/"] - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Format files as Gemini multimodal content blocks. - - Gemini supports both inlineData format and file references via File API. - Uses FileResolver to determine the best delivery method based on file size. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - - Returns: - List of content blocks in Gemini's expected format. - """ - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - content_blocks: list[dict[str, Any]] = [] - supported_types = self.supported_multimodal_content_types() - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - - for file_input in files.values(): - content_type = file_input.content_type - if not any(content_type.startswith(t) for t in supported_types): - continue - - resolved = resolver.resolve(file_input, "gemini") - - if isinstance(resolved, FileReference) and resolved.file_uri: - # Use file reference format for uploaded files - content_blocks.append( - { - "fileData": { - "mimeType": resolved.content_type, - "fileUri": resolved.file_uri, - } - } - ) - elif isinstance(resolved, InlineBase64): - # Use inline format for smaller files - content_blocks.append( - { - "inlineData": { - "mimeType": resolved.content_type, - "data": resolved.data, - } - } - ) - else: - # Fallback to base64 encoding - data = base64.b64encode(file_input.read()).decode("ascii") - content_blocks.append( - { - "inlineData": { - "mimeType": content_type, - "data": data, - } - } - ) - - return content_blocks - - async def aformat_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Async format files as Gemini multimodal content blocks. - - Uses parallel file resolution for improved performance with multiple files. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - - Returns: - List of content blocks in Gemini's expected format. - """ - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - supported_types = self.supported_multimodal_content_types() - - supported_files = { - name: f - for name, f in files.items() - if any(f.content_type.startswith(t) for t in supported_types) - } - - if not supported_files: - return [] - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - resolved_files = await resolver.aresolve_files(supported_files, "gemini") - - content_blocks: list[dict[str, Any]] = [] - for resolved in resolved_files.values(): - if isinstance(resolved, FileReference) and resolved.file_uri: - content_blocks.append( - { - "fileData": { - "mimeType": resolved.content_type, - "fileUri": resolved.file_uri, - } - } - ) - elif isinstance(resolved, InlineBase64): - content_blocks.append( - { - "inlineData": { - "mimeType": resolved.content_type, - "data": resolved.data, - } - } - ) - - return content_blocks - def format_text_content(self, text: str) -> dict[str, Any]: """Format text as a Gemini content block. diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index 8b9e9a91a..9271c69df 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -1,6 +1,5 @@ from __future__ import annotations -import base64 from collections.abc import AsyncIterator import json import logging @@ -27,8 +26,6 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai_files import FileInput, UploadCache - from crewai.agent.core import Agent from crewai.llms.hooks.base import BaseInterceptor from crewai.task import Task @@ -1080,136 +1077,3 @@ class OpenAICompletion(BaseLLM): if not self.supports_multimodal(): return [] return ["image/"] - - def format_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Format files as OpenAI multimodal content blocks. - - OpenAI supports both base64 data URLs and file_id references via Files API. - Uses FileResolver to determine the best delivery method based on file size. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - - Returns: - List of content blocks in OpenAI's expected format. - """ - if not self.supports_multimodal(): - return [] - - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - content_blocks: list[dict[str, Any]] = [] - supported_types = self.supported_multimodal_content_types() - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - - for file_input in files.values(): - content_type = file_input.content_type - if not any(content_type.startswith(t) for t in supported_types): - continue - - resolved = resolver.resolve(file_input, "openai") - - if isinstance(resolved, FileReference): - content_blocks.append( - { - "type": "file", - "file": { - "file_id": resolved.file_id, - }, - } - ) - elif isinstance(resolved, InlineBase64): - content_blocks.append( - { - "type": "image_url", - "image_url": { - "url": f"data:{resolved.content_type};base64,{resolved.data}" - }, - } - ) - else: - data = base64.b64encode(file_input.read()).decode("ascii") - content_blocks.append( - { - "type": "image_url", - "image_url": {"url": f"data:{content_type};base64,{data}"}, - } - ) - - return content_blocks - - async def aformat_multimodal_content( - self, - files: dict[str, FileInput], - upload_cache: UploadCache | None = None, - ) -> list[dict[str, Any]]: - """Async format files as OpenAI multimodal content blocks. - - Uses parallel file resolution for improved performance with multiple files. - - Args: - files: Dictionary mapping file names to FileInput objects. - upload_cache: Optional cache for tracking uploaded files. - - Returns: - List of content blocks in OpenAI's expected format. - """ - if not self.supports_multimodal(): - return [] - - from crewai_files import ( - FileReference, - FileResolver, - FileResolverConfig, - InlineBase64, - ) - - supported_types = self.supported_multimodal_content_types() - - supported_files = { - name: f - for name, f in files.items() - if any(f.content_type.startswith(t) for t in supported_types) - } - - if not supported_files: - return [] - - config = FileResolverConfig(prefer_upload=False) - resolver = FileResolver(config=config, upload_cache=upload_cache) - resolved_files = await resolver.aresolve_files(supported_files, "openai") - - content_blocks: list[dict[str, Any]] = [] - for resolved in resolved_files.values(): - if isinstance(resolved, FileReference): - content_blocks.append( - { - "type": "file", - "file": { - "file_id": resolved.file_id, - }, - } - ) - elif isinstance(resolved, InlineBase64): - content_blocks.append( - { - "type": "image_url", - "image_url": { - "url": f"data:{resolved.content_type};base64,{resolved.data}" - }, - } - ) - - return content_blocks diff --git a/uv.lock b/uv.lock index 3d0335669..9359ccff0 100644 --- a/uv.lock +++ b/uv.lock @@ -1345,7 +1345,6 @@ requires-dist = [ [[package]] name = "crewai-files" -version = "0.1.0" source = { editable = "lib/crewai-files" } dependencies = [ { name = "aiocache" },