diff --git a/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py b/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py index 97b974c6b..e74ee23c7 100644 --- a/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py +++ b/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py @@ -64,7 +64,6 @@ class ReadFileTool(BaseTool): content_type = file_input.content_type filename = file_input.filename or file_name - # Text-based content types text_types = ( "text/", "application/json", @@ -75,6 +74,5 @@ class ReadFileTool(BaseTool): if any(content_type.startswith(t) for t in text_types): return content.decode("utf-8") - # Binary content - return base64 encoded encoded = base64.b64encode(content).decode("ascii") return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}" diff --git a/lib/crewai/src/crewai/utilities/file_store.py b/lib/crewai/src/crewai/utilities/file_store.py index fa0c11aa7..814ee486c 100644 --- a/lib/crewai/src/crewai/utilities/file_store.py +++ b/lib/crewai/src/crewai/utilities/file_store.py @@ -34,18 +34,15 @@ def _run_sync(coro: Coroutine[None, None, T]) -> T: """ try: asyncio.get_running_loop() - # We're in an async context - run in a thread pool with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(asyncio.run, coro) return future.result() except RuntimeError: - # No running loop - safe to create one return asyncio.run(coro) DEFAULT_TTL = 3600 -# Key prefixes for different scopes _CREW_PREFIX = "crew:" _TASK_PREFIX = "task:" @@ -149,7 +146,6 @@ async def aget_all_files( if not crew_files and not task_files: return None - # Merge with task files taking precedence return {**crew_files, **(task_files or {})} diff --git a/lib/crewai/src/crewai/utilities/files/__init__.py b/lib/crewai/src/crewai/utilities/files/__init__.py index fcd235e15..6d08e2d73 100644 --- a/lib/crewai/src/crewai/utilities/files/__init__.py +++ b/lib/crewai/src/crewai/utilities/files/__init__.py @@ -96,7 +96,6 @@ def wrap_file_source(source: FileSource) -> FileInput: return VideoFile(source=source) if content_type == "application/pdf": return PDFFile(source=source) - # Default to text for anything else return TextFile(source=source) @@ -116,10 +115,8 @@ def normalize_input_files( result: dict[str, FileInput] = {} for i, item in enumerate(input_files): - # If it's already a typed File wrapper, use it directly if isinstance(item, BaseFile): name = item.filename or f"file_{i}" - # Remove extension from name for cleaner keys if "." in name: name = name.rsplit(".", 1)[0] result[name] = item diff --git a/lib/crewai/src/crewai/utilities/files/processing/constraints.py b/lib/crewai/src/crewai/utilities/files/processing/constraints.py index 4fb2d284c..5b37762d6 100644 --- a/lib/crewai/src/crewai/utilities/files/processing/constraints.py +++ b/lib/crewai/src/crewai/utilities/files/processing/constraints.py @@ -108,45 +108,38 @@ class ProviderConstraints: file_upload_threshold_bytes: int | None = None -# Anthropic constraints (Claude 3+) -# https://docs.anthropic.com/en/docs/build-with-claude/vision ANTHROPIC_CONSTRAINTS = ProviderConstraints( name="anthropic", image=ImageConstraints( - max_size_bytes=5 * 1024 * 1024, # 5MB + max_size_bytes=5 * 1024 * 1024, max_width=8000, max_height=8000, supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"), ), pdf=PDFConstraints( - max_size_bytes=30 * 1024 * 1024, # 30MB + max_size_bytes=30 * 1024 * 1024, max_pages=100, ), supports_file_upload=True, - file_upload_threshold_bytes=5 * 1024 * 1024, # Use upload for files > 5MB + file_upload_threshold_bytes=5 * 1024 * 1024, ) -# OpenAI constraints (GPT-4o, GPT-4 Vision) -# https://platform.openai.com/docs/guides/vision OPENAI_CONSTRAINTS = ProviderConstraints( name="openai", image=ImageConstraints( - max_size_bytes=20 * 1024 * 1024, # 20MB + max_size_bytes=20 * 1024 * 1024, max_images_per_request=10, supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"), ), - # OpenAI does not support PDFs natively pdf=None, supports_file_upload=True, - file_upload_threshold_bytes=5 * 1024 * 1024, # Use upload for files > 5MB + file_upload_threshold_bytes=5 * 1024 * 1024, ) -# Gemini constraints -# https://ai.google.dev/gemini-api/docs/vision GEMINI_CONSTRAINTS = ProviderConstraints( name="gemini", image=ImageConstraints( - max_size_bytes=100 * 1024 * 1024, # 100MB inline + max_size_bytes=100 * 1024 * 1024, supported_formats=( "image/png", "image/jpeg", @@ -157,10 +150,10 @@ GEMINI_CONSTRAINTS = ProviderConstraints( ), ), pdf=PDFConstraints( - max_size_bytes=50 * 1024 * 1024, # 50MB inline + max_size_bytes=50 * 1024 * 1024, ), audio=AudioConstraints( - max_size_bytes=100 * 1024 * 1024, # 100MB + max_size_bytes=100 * 1024 * 1024, supported_formats=( "audio/mp3", "audio/mpeg", @@ -173,7 +166,7 @@ GEMINI_CONSTRAINTS = ProviderConstraints( ), ), video=VideoConstraints( - max_size_bytes=2 * 1024 * 1024 * 1024, # 2GB via File API + max_size_bytes=2 * 1024 * 1024 * 1024, supported_formats=( "video/mp4", "video/mpeg", @@ -184,30 +177,27 @@ GEMINI_CONSTRAINTS = ProviderConstraints( ), ), supports_file_upload=True, - file_upload_threshold_bytes=20 * 1024 * 1024, # Use upload for files > 20MB + file_upload_threshold_bytes=20 * 1024 * 1024, ) -# AWS Bedrock constraints (Claude via Bedrock) -# https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html BEDROCK_CONSTRAINTS = ProviderConstraints( name="bedrock", image=ImageConstraints( - max_size_bytes=4_608_000, # ~4.5MB (encoded size limit) + max_size_bytes=4_608_000, max_width=8000, max_height=8000, supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"), ), pdf=PDFConstraints( - max_size_bytes=3_840_000, # ~3.75MB + max_size_bytes=3_840_000, max_pages=100, ), ) -# Azure OpenAI constraints (same as OpenAI) AZURE_CONSTRAINTS = ProviderConstraints( name="azure", image=ImageConstraints( - max_size_bytes=20 * 1024 * 1024, # 20MB + max_size_bytes=20 * 1024 * 1024, max_images_per_request=10, supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"), ), @@ -215,14 +205,12 @@ AZURE_CONSTRAINTS = ProviderConstraints( ) -# Provider name mapping for convenience _PROVIDER_CONSTRAINTS_MAP: dict[str, ProviderConstraints] = { "anthropic": ANTHROPIC_CONSTRAINTS, "openai": OPENAI_CONSTRAINTS, "gemini": GEMINI_CONSTRAINTS, "bedrock": BEDROCK_CONSTRAINTS, "azure": AZURE_CONSTRAINTS, - # Aliases "claude": ANTHROPIC_CONSTRAINTS, "gpt": OPENAI_CONSTRAINTS, "google": GEMINI_CONSTRAINTS, @@ -246,11 +234,9 @@ def get_constraints_for_provider( provider_lower = provider.lower() - # Direct lookup if provider_lower in _PROVIDER_CONSTRAINTS_MAP: return _PROVIDER_CONSTRAINTS_MAP[provider_lower] - # Check if provider name contains any known provider for key, constraints in _PROVIDER_CONSTRAINTS_MAP.items(): if key in provider_lower: return constraints diff --git a/lib/crewai/src/crewai/utilities/files/processing/processor.py b/lib/crewai/src/crewai/utilities/files/processing/processor.py index 4711772a3..b40087d96 100644 --- a/lib/crewai/src/crewai/utilities/files/processing/processor.py +++ b/lib/crewai/src/crewai/utilities/files/processing/processor.py @@ -125,15 +125,12 @@ class FileProcessor: mode = self._get_mode(file) try: - # First validate errors = self.validate(file) if not errors: return file - # Handle based on mode if mode == FileHandling.STRICT: - # Errors should have already raised in validate() raise FileValidationError("; ".join(errors), file_name=file.filename) if mode == FileHandling.WARN: @@ -178,7 +175,6 @@ class FileProcessor: if isinstance(processed, Sequence) and not isinstance( processed, (str, bytes) ): - # File was chunked - add each chunk with indexed name for i, chunk in enumerate(processed): chunk_name = f"{name}_chunk_{i}" result[chunk_name] = chunk @@ -203,15 +199,12 @@ class FileProcessor: return self._auto_process_image(file) if isinstance(file, PDFFile) and self.constraints.pdf is not None: - # PDFs can't easily be auto-compressed, log warning logger.warning( f"Cannot auto-compress PDF '{file.filename}'. " "Consider using CHUNK mode for large PDFs." ) return file - # Audio and video auto-processing would require additional dependencies - # For now, just warn if isinstance(file, (AudioFile, VideoFile)): logger.warning( f"Auto-processing not supported for {type(file).__name__}. " @@ -235,10 +228,9 @@ class FileProcessor: image_constraints = self.constraints.image processed = file - content = file.source.read() + content = file.read() current_size = len(content) - # First, resize if dimensions exceed limits if image_constraints.max_width or image_constraints.max_height: dimensions = get_image_dimensions(file) if dimensions: @@ -249,12 +241,11 @@ class FileProcessor: if width > max_w or height > max_h: try: processed = resize_image(file, max_w, max_h) - content = processed.source.read() + content = processed.read() current_size = len(content) except Exception as e: logger.warning(f"Failed to resize image: {e}") - # Then, optimize if size still exceeds limits if current_size > image_constraints.max_size_bytes: try: processed = optimize_image(processed, image_constraints.max_size_bytes) @@ -290,7 +281,7 @@ class FileProcessor: # Use general max size as character limit approximation max_size = self.constraints.general_max_size_bytes if max_size is not None: - content = file.source.read() + content = file.read() if len(content) > max_size: try: return chunk_text(file, max_size) @@ -298,7 +289,6 @@ class FileProcessor: logger.warning(f"Failed to chunk text file: {e}") return file - # For other file types, chunking is not supported if isinstance(file, (ImageFile, AudioFile, VideoFile)): logger.warning( f"Chunking not supported for {type(file).__name__}. " diff --git a/lib/crewai/src/crewai/utilities/files/processing/transformers.py b/lib/crewai/src/crewai/utilities/files/processing/transformers.py index 26de42185..2e4288bb7 100644 --- a/lib/crewai/src/crewai/utilities/files/processing/transformers.py +++ b/lib/crewai/src/crewai/utilities/files/processing/transformers.py @@ -42,17 +42,15 @@ def resize_image( install_command="pip install Pillow", ) from e - content = file.source.read() + content = file.read() with Image.open(io.BytesIO(content)) as img: original_width, original_height = img.size - # Check if resize is needed if original_width <= max_width and original_height <= max_height: return file if preserve_aspect_ratio: - # Calculate scaling factor to fit within bounds width_ratio = max_width / original_width height_ratio = max_height / original_height scale_factor = min(width_ratio, height_ratio) @@ -63,17 +61,13 @@ def resize_image( new_width = min(original_width, max_width) new_height = min(original_height, max_height) - # Resize the image resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) - # Determine output format output_format = img.format or "PNG" if output_format.upper() == "JPEG": - # Handle RGBA images for JPEG if resized_img.mode in ("RGBA", "LA", "P"): resized_img = resized_img.convert("RGB") - # Save to bytes output_buffer = io.BytesIO() resized_img.save(output_buffer, format=output_format) output_bytes = output_buffer.getvalue() @@ -118,15 +112,13 @@ def optimize_image( install_command="pip install Pillow", ) from e - content = file.source.read() + content = file.read() current_size = len(content) - # If already within target, return as-is if current_size <= target_size_bytes: return file with Image.open(io.BytesIO(content)) as img: - # Convert to RGB for JPEG compression if needed if img.mode in ("RGBA", "LA", "P"): img = img.convert("RGB") output_format = "JPEG" @@ -138,7 +130,6 @@ def optimize_image( quality = initial_quality output_bytes = content - # Binary search for optimal quality while len(output_bytes) > target_size_bytes and quality >= min_quality: output_buffer = io.BytesIO() img.save( @@ -193,11 +184,10 @@ def chunk_pdf( install_command="pip install pypdf", ) from e - content = file.source.read() + content = file.read() reader = PdfReader(io.BytesIO(content)) total_pages = len(reader.pages) - # If within limit, return as-is if total_pages <= max_pages: return [file] @@ -253,11 +243,10 @@ def chunk_text( Returns: List of TextFile objects, one per chunk. """ - content = file.source.read() + content = file.read() text = content.decode("utf-8", errors="replace") total_chars = len(text) - # If within limit, return as-is if total_chars <= max_chars: return [file] @@ -291,7 +280,6 @@ def chunk_text( f"Created text chunk '{chunk_filename}' with {len(chunk_text)} characters" ) - # Move start position with overlap start_pos = end_pos - overlap_chars if end_pos < total_chars else total_chars chunk_num += 1 @@ -313,7 +301,7 @@ def get_image_dimensions(file: ImageFile) -> tuple[int, int] | None: logger.warning("Pillow not installed - cannot get image dimensions") return None - content = file.source.read() + content = file.read() try: with Image.open(io.BytesIO(content)) as img: @@ -339,7 +327,7 @@ def get_pdf_page_count(file: PDFFile) -> int | None: logger.warning("pypdf not installed - cannot get PDF page count") return None - content = file.source.read() + content = file.read() try: reader = PdfReader(io.BytesIO(content)) diff --git a/lib/crewai/src/crewai/utilities/files/processing/validators.py b/lib/crewai/src/crewai/utilities/files/processing/validators.py index 6f576d6d3..814df513d 100644 --- a/lib/crewai/src/crewai/utilities/files/processing/validators.py +++ b/lib/crewai/src/crewai/utilities/files/processing/validators.py @@ -63,11 +63,10 @@ def validate_image( UnsupportedFileTypeError: If the format is not supported. """ errors: list[str] = [] - content = file.source.read() + content = file.read() file_size = len(content) filename = file.filename - # Check file size if file_size > constraints.max_size_bytes: msg = ( f"Image '{filename}' size ({_format_size(file_size)}) exceeds " @@ -82,7 +81,6 @@ def validate_image( max_size=constraints.max_size_bytes, ) - # Check format content_type = file.content_type if content_type not in constraints.supported_formats: msg = ( @@ -95,7 +93,6 @@ def validate_image( msg, file_name=filename, content_type=content_type ) - # Check dimensions if constraints specify them if constraints.max_width is not None or constraints.max_height is not None: try: import io @@ -153,11 +150,10 @@ def validate_pdf( FileValidationError: If the file exceeds page limits. """ errors: list[str] = [] - content = file.source.read() + content = file.read() file_size = len(content) filename = file.filename - # Check file size if file_size > constraints.max_size_bytes: msg = ( f"PDF '{filename}' size ({_format_size(file_size)}) exceeds " @@ -172,7 +168,6 @@ def validate_pdf( max_size=constraints.max_size_bytes, ) - # Check page count if constraint specifies it if constraints.max_pages is not None: try: import io @@ -221,11 +216,10 @@ def validate_audio( UnsupportedFileTypeError: If the format is not supported. """ errors: list[str] = [] - content = file.source.read() + content = file.read() file_size = len(content) filename = file.filename - # Check file size if file_size > constraints.max_size_bytes: msg = ( f"Audio '{filename}' size ({_format_size(file_size)}) exceeds " @@ -240,7 +234,6 @@ def validate_audio( max_size=constraints.max_size_bytes, ) - # Check format content_type = file.content_type if content_type not in constraints.supported_formats: msg = ( @@ -277,11 +270,10 @@ def validate_video( UnsupportedFileTypeError: If the format is not supported. """ errors: list[str] = [] - content = file.source.read() + content = file.read() file_size = len(content) filename = file.filename - # Check file size if file_size > constraints.max_size_bytes: msg = ( f"Video '{filename}' size ({_format_size(file_size)}) exceeds " @@ -296,7 +288,6 @@ def validate_video( max_size=constraints.max_size_bytes, ) - # Check format content_type = file.content_type if content_type not in constraints.supported_formats: msg = ( @@ -336,7 +327,7 @@ def validate_text( if constraints.general_max_size_bytes is None: return errors - content = file.source.read() + content = file.read() file_size = len(content) filename = file.filename @@ -423,5 +414,4 @@ def validate_file( if isinstance(file, TextFile): return validate_text(file, constraints, raise_on_error=raise_on_error) - # Unknown file type - can't validate return [] diff --git a/lib/crewai/src/crewai/utilities/files/resolver.py b/lib/crewai/src/crewai/utilities/files/resolver.py index 0459a1b8e..b8ee9460d 100644 --- a/lib/crewai/src/crewai/utilities/files/resolver.py +++ b/lib/crewai/src/crewai/utilities/files/resolver.py @@ -80,9 +80,8 @@ class FileResolver: """ provider_lower = provider.lower() constraints = get_constraints_for_provider(provider) - file_size = len(file.source.read()) + file_size = len(file.read()) - # Determine if we should use file upload should_upload = self._should_upload( file, provider_lower, constraints, file_size ) @@ -91,9 +90,7 @@ class FileResolver: resolved = self._resolve_via_upload(file, provider_lower) if resolved is not None: return resolved - # Fall back to inline if upload fails - # Use inline format return self._resolve_inline(file, provider_lower) def resolve_files( @@ -130,15 +127,12 @@ class FileResolver: Returns: True if the file should be uploaded, False otherwise. """ - # Check if provider supports file upload if constraints is None or not constraints.supports_file_upload: return False - # If prefer_upload is set, always prefer upload if self.config.prefer_upload: return True - # Check against size threshold threshold = self.config.upload_threshold_bytes if threshold is None and constraints is not None: threshold = constraints.file_upload_threshold_bytes @@ -162,7 +156,6 @@ class FileResolver: Returns: FileReference if upload succeeds, None otherwise. """ - # Check cache first if self.upload_cache is not None: cached = self.upload_cache.get(file, provider) if cached is not None: @@ -177,7 +170,6 @@ class FileResolver: file_uri=cached.file_uri, ) - # Get or create uploader uploader = self._get_uploader(provider) if uploader is None: logger.debug(f"No uploader available for {provider}") @@ -186,7 +178,6 @@ class FileResolver: try: result = uploader.upload(file) - # Cache the result if self.upload_cache is not None: self.upload_cache.set( file=file, @@ -218,16 +209,14 @@ class FileResolver: Returns: InlineBase64 or InlineBytes depending on provider. """ - content = file.source.read() + content = file.read() - # Use raw bytes for Bedrock if configured if self.config.use_bytes_for_bedrock and "bedrock" in provider: return InlineBytes( content_type=file.content_type, data=content, ) - # Default to base64 encoded = base64.b64encode(content).decode("ascii") return InlineBase64( content_type=file.content_type, diff --git a/lib/crewai/src/crewai/utilities/files/upload_cache.py b/lib/crewai/src/crewai/utilities/files/upload_cache.py index ea83d2bf4..2a76542e7 100644 --- a/lib/crewai/src/crewai/utilities/files/upload_cache.py +++ b/lib/crewai/src/crewai/utilities/files/upload_cache.py @@ -126,10 +126,6 @@ class UploadCache: if provider in self._provider_keys: self._provider_keys[provider].discard(key) - # ------------------------------------------------------------------------- - # Async methods (primary interface) - # ------------------------------------------------------------------------- - async def aget(self, file: FileInput, provider: str) -> CachedUpload | None: """Get a cached upload for a file. @@ -334,10 +330,6 @@ class UploadCache: results.append(cached) return results - # ------------------------------------------------------------------------- - # Sync wrappers (convenience) - # ------------------------------------------------------------------------- - def _run_sync(self, coro: Any) -> Any: """Run an async coroutine from sync context.""" try: @@ -436,7 +428,6 @@ class UploadCache: return builtins.set(self._provider_keys.keys()) -# Module-level cache instance _default_cache: UploadCache | None = None @@ -482,7 +473,6 @@ def _cleanup_on_exit() -> None: if _default_cache is None or len(_default_cache) == 0: return - # Import here to avoid circular imports from crewai.utilities.files.cleanup import cleanup_uploaded_files try: diff --git a/lib/crewai/src/crewai/utilities/files/uploaders/anthropic.py b/lib/crewai/src/crewai/utilities/files/uploaders/anthropic.py index 69153863b..c7bf64010 100644 --- a/lib/crewai/src/crewai/utilities/files/uploaders/anthropic.py +++ b/lib/crewai/src/crewai/utilities/files/uploaders/anthropic.py @@ -77,18 +77,15 @@ class AnthropicFileUploader(FileUploader): """ client = self._get_client() - content = file.source.read() + content = file.read() file_purpose = purpose or "user_upload" - # Create a file-like object for upload file_data = io.BytesIO(content) logger.info( f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)" ) - # Upload using the anthropic client - # Note: The Anthropic Files API uses a tuple format: (filename, file_obj, content_type) uploaded_file = client.files.create( file=(file.filename, file_data, file.content_type), purpose=file_purpose, @@ -98,9 +95,9 @@ class AnthropicFileUploader(FileUploader): return UploadResult( file_id=uploaded_file.id, - file_uri=None, # Anthropic doesn't provide a URI + file_uri=None, content_type=file.content_type, - expires_at=None, # Anthropic files don't auto-expire + expires_at=None, provider=self.provider_name, ) diff --git a/lib/crewai/src/crewai/utilities/files/uploaders/gemini.py b/lib/crewai/src/crewai/utilities/files/uploaders/gemini.py index 7c5bce8c5..c4a53db38 100644 --- a/lib/crewai/src/crewai/utilities/files/uploaders/gemini.py +++ b/lib/crewai/src/crewai/utilities/files/uploaders/gemini.py @@ -23,7 +23,6 @@ logger = logging.getLogger(__name__) FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile -# Gemini files expire after 48 hours GEMINI_FILE_TTL = timedelta(hours=48) @@ -80,10 +79,9 @@ class GeminiFileUploader(FileUploader): """ client = self._get_client() - content = file.source.read() + content = file.read() display_name = purpose or file.filename - # Create a file-like object for upload file_data = io.BytesIO(content) file_data.name = file.filename @@ -91,7 +89,6 @@ class GeminiFileUploader(FileUploader): f"Uploading file '{file.filename}' to Gemini ({len(content)} bytes)" ) - # Upload using the genai client uploaded_file = client.files.upload( file=file_data, config={ @@ -199,7 +196,6 @@ class GeminiFileUploader(FileUploader): try: from google.genai.types import FileState except ImportError: - # If we can't import FileState, just return True return True client = self._get_client() diff --git a/lib/crewai/src/crewai/utilities/files/uploaders/openai.py b/lib/crewai/src/crewai/utilities/files/uploaders/openai.py index ecfa257b7..f94905316 100644 --- a/lib/crewai/src/crewai/utilities/files/uploaders/openai.py +++ b/lib/crewai/src/crewai/utilities/files/uploaders/openai.py @@ -77,7 +77,7 @@ class OpenAIFileUploader(FileUploader): """ client = self._get_client() - content = file.source.read() + content = file.read() file_purpose = purpose or "user_data" file_data = io.BytesIO(content) @@ -96,9 +96,9 @@ class OpenAIFileUploader(FileUploader): return UploadResult( file_id=uploaded_file.id, - file_uri=None, # OpenAI doesn't provide a URI + file_uri=None, content_type=file.content_type, - expires_at=None, # OpenAI files don't auto-expire + expires_at=None, provider=self.provider_name, )