diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ed85cee73..defe87b5c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: language: system pass_filenames: true types: [python] - exclude: ^(lib/crewai/src/crewai/cli/templates/|lib/crewai/tests/|lib/crewai-tools/tests/) + exclude: ^(lib/crewai/src/crewai/cli/templates/|lib/crewai/tests/|lib/crewai-tools/tests/|lib/crewai-files/tests/) - repo: https://github.com/astral-sh/uv-pre-commit rev: 0.9.3 hooks: diff --git a/conftest.py b/conftest.py index d63e7c885..5d1be7a24 100644 --- a/conftest.py +++ b/conftest.py @@ -160,7 +160,10 @@ def vcr_cassette_dir(request: Any) -> str: test_file = Path(request.fspath) for parent in test_file.parents: - if parent.name in ("crewai", "crewai-tools") and parent.parent.name == "lib": + if ( + parent.name in ("crewai", "crewai-tools", "crewai-files") + and parent.parent.name == "lib" + ): package_root = parent break else: diff --git a/lib/crewai-files/README.md b/lib/crewai-files/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/lib/crewai-files/pyproject.toml b/lib/crewai-files/pyproject.toml new file mode 100644 index 000000000..9b3e6a8a3 --- /dev/null +++ b/lib/crewai-files/pyproject.toml @@ -0,0 +1,25 @@ +[project] +name = "crewai-files" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +authors = [ + { name = "Greyson LaLonde", email = "greyson.r.lalonde@gmail.com" } +] +requires-python = ">=3.10, <3.14" +dependencies = [ + "Pillow~=10.4.0", + "pypdf~=4.0.0", + "python-magic>=0.4.27", + "aiocache~=0.12.3", + "aiofiles~=24.1.0", + "tinytag~=1.10.0", + "av~=13.0.0", +] + +[project.scripts] +crewai-files = "crewai_files:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/lib/crewai/src/crewai/files/__init__.py b/lib/crewai-files/src/crewai_files/__init__.py similarity index 86% rename from lib/crewai/src/crewai/files/__init__.py rename to lib/crewai-files/src/crewai_files/__init__.py index 1c7360227..4c874fbe0 100644 --- a/lib/crewai/src/crewai/files/__init__.py +++ b/lib/crewai-files/src/crewai_files/__init__.py @@ -1,11 +1,34 @@ """File handling utilities for crewAI tasks.""" -from crewai.files.cleanup import ( +from crewai_files.cache.cleanup import ( cleanup_expired_files, cleanup_provider_files, cleanup_uploaded_files, ) -from crewai.files.content_types import ( +from crewai_files.cache.upload_cache import ( + CachedUpload, + UploadCache, + get_upload_cache, + reset_upload_cache, +) +from crewai_files.core.resolved import ( + FileReference, + InlineBase64, + InlineBytes, + ResolvedFile, + ResolvedFileType, + UrlReference, +) +from crewai_files.core.sources import ( + FileBytes, + FilePath, + FileSource, + FileSourceInput, + FileStream, + FileUrl, + RawFileInput, +) +from crewai_files.core.types import ( AudioExtension, AudioFile, AudioMimeType, @@ -26,16 +49,7 @@ from crewai.files.content_types import ( VideoFile, VideoMimeType, ) -from crewai.files.file import ( - FileBytes, - FilePath, - FileSource, - FileSourceInput, - FileStream, - FileUrl, - RawFileInput, -) -from crewai.files.processing import ( +from crewai_files.processing import ( ANTHROPIC_CONSTRAINTS, BEDROCK_CONSTRAINTS, GEMINI_CONSTRAINTS, @@ -54,27 +68,13 @@ from crewai.files.processing import ( VideoConstraints, get_constraints_for_provider, ) -from crewai.files.resolved import ( - FileReference, - InlineBase64, - InlineBytes, - ResolvedFile, - ResolvedFileType, - UrlReference, -) -from crewai.files.resolver import ( +from crewai_files.resolution.resolver import ( FileResolver, FileResolverConfig, create_resolver, ) -from crewai.files.upload_cache import ( - CachedUpload, - UploadCache, - get_upload_cache, - reset_upload_cache, -) -from crewai.files.uploaders import FileUploader, UploadResult, get_uploader -from crewai.files.utils import normalize_input_files, wrap_file_source +from crewai_files.resolution.utils import normalize_input_files, wrap_file_source +from crewai_files.uploaders import FileUploader, UploadResult, get_uploader __all__ = [ diff --git a/lib/crewai-files/src/crewai_files/cache/__init__.py b/lib/crewai-files/src/crewai_files/cache/__init__.py new file mode 100644 index 000000000..74a4e2b6a --- /dev/null +++ b/lib/crewai-files/src/crewai_files/cache/__init__.py @@ -0,0 +1,14 @@ +"""Upload caching and cleanup.""" + +from crewai_files.cache.cleanup import cleanup_uploaded_files +from crewai_files.cache.metrics import FileOperationMetrics, measure_operation +from crewai_files.cache.upload_cache import UploadCache, get_upload_cache + + +__all__ = [ + "FileOperationMetrics", + "UploadCache", + "cleanup_uploaded_files", + "get_upload_cache", + "measure_operation", +] diff --git a/lib/crewai/src/crewai/files/cleanup.py b/lib/crewai-files/src/crewai_files/cache/cleanup.py similarity index 95% rename from lib/crewai/src/crewai/files/cleanup.py rename to lib/crewai-files/src/crewai_files/cache/cleanup.py index b58f66b6c..41e71bf05 100644 --- a/lib/crewai/src/crewai/files/cleanup.py +++ b/lib/crewai-files/src/crewai_files/cache/cleanup.py @@ -6,12 +6,13 @@ import asyncio import logging from typing import TYPE_CHECKING -from crewai.files.upload_cache import CachedUpload, UploadCache -from crewai.files.uploaders import get_uploader +from crewai_files.cache.upload_cache import CachedUpload, UploadCache +from crewai_files.uploaders import get_uploader +from crewai_files.uploaders.factory import ProviderType if TYPE_CHECKING: - from crewai.files.uploaders.base import FileUploader + from crewai_files.uploaders.base import FileUploader logger = logging.getLogger(__name__) @@ -46,7 +47,7 @@ def cleanup_uploaded_files( cache: UploadCache, *, delete_from_provider: bool = True, - providers: list[str] | None = None, + providers: list[ProviderType] | None = None, ) -> int: """Clean up uploaded files from the cache and optionally from providers. @@ -60,7 +61,7 @@ def cleanup_uploaded_files( """ cleaned = 0 - provider_uploads: dict[str, list[CachedUpload]] = {} + provider_uploads: dict[ProviderType, list[CachedUpload]] = {} for provider in _get_providers_from_cache(cache): if providers is not None and provider not in providers: @@ -126,7 +127,7 @@ def cleanup_expired_files( def cleanup_provider_files( - provider: str, + provider: ProviderType, *, cache: UploadCache | None = None, delete_all_from_provider: bool = False, @@ -169,7 +170,7 @@ def cleanup_provider_files( return deleted -def _get_providers_from_cache(cache: UploadCache) -> set[str]: +def _get_providers_from_cache(cache: UploadCache) -> set[ProviderType]: """Get unique provider names from cache entries. Args: @@ -211,7 +212,7 @@ async def acleanup_uploaded_files( cache: UploadCache, *, delete_from_provider: bool = True, - providers: list[str] | None = None, + providers: list[ProviderType] | None = None, max_concurrency: int = 10, ) -> int: """Async clean up uploaded files from the cache and optionally from providers. @@ -227,7 +228,7 @@ async def acleanup_uploaded_files( """ cleaned = 0 - provider_uploads: dict[str, list[CachedUpload]] = {} + provider_uploads: dict[ProviderType, list[CachedUpload]] = {} for provider in _get_providers_from_cache(cache): if providers is not None and provider not in providers: @@ -315,7 +316,7 @@ async def acleanup_expired_files( async def acleanup_provider_files( - provider: str, + provider: ProviderType, *, cache: UploadCache | None = None, delete_all_from_provider: bool = False, diff --git a/lib/crewai/src/crewai/files/metrics.py b/lib/crewai-files/src/crewai_files/cache/metrics.py similarity index 100% rename from lib/crewai/src/crewai/files/metrics.py rename to lib/crewai-files/src/crewai_files/cache/metrics.py diff --git a/lib/crewai/src/crewai/files/upload_cache.py b/lib/crewai-files/src/crewai_files/cache/upload_cache.py similarity index 90% rename from lib/crewai/src/crewai/files/upload_cache.py rename to lib/crewai-files/src/crewai_files/cache/upload_cache.py index 8fab127da..48cebdfa1 100644 --- a/lib/crewai/src/crewai/files/upload_cache.py +++ b/lib/crewai-files/src/crewai_files/cache/upload_cache.py @@ -15,11 +15,12 @@ from typing import TYPE_CHECKING, Any from aiocache import Cache # type: ignore[import-untyped] from aiocache.serializers import PickleSerializer # type: ignore[import-untyped] -from crewai.files.constants import DEFAULT_MAX_CACHE_ENTRIES, DEFAULT_TTL_SECONDS +from crewai_files.core.constants import DEFAULT_MAX_CACHE_ENTRIES, DEFAULT_TTL_SECONDS +from crewai_files.uploaders.factory import ProviderType if TYPE_CHECKING: - from crewai.files.content_types import FileInput + from crewai_files.core.types import FileInput logger = logging.getLogger(__name__) @@ -38,7 +39,7 @@ class CachedUpload: """ file_id: str - provider: str + provider: ProviderType file_uri: str | None content_type: str uploaded_at: datetime @@ -76,7 +77,7 @@ def _compute_file_hash(file: FileInput) -> str: Uses streaming for FilePath sources to avoid loading large files into memory. """ - from crewai.files.file import FilePath + from crewai_files.core.sources import FilePath source = file._file_source if isinstance(source, FilePath): @@ -116,7 +117,7 @@ class UploadCache: self.ttl = ttl self.namespace = namespace self.max_entries = max_entries - self._provider_keys: dict[str, set[str]] = {} + self._provider_keys: dict[ProviderType, set[str]] = {} self._key_access_order: list[str] = [] if cache_type == "redis": @@ -132,7 +133,7 @@ class UploadCache: namespace=namespace, ) - def _track_key(self, provider: str, key: str) -> None: + def _track_key(self, provider: ProviderType, key: str) -> None: """Track a key for a provider (for cleanup) and access order.""" if provider not in self._provider_keys: self._provider_keys[provider] = set() @@ -141,7 +142,7 @@ class UploadCache: self._key_access_order.remove(key) self._key_access_order.append(key) - def _untrack_key(self, provider: str, key: str) -> None: + def _untrack_key(self, provider: ProviderType, key: str) -> None: """Remove key tracking for a provider.""" if provider in self._provider_keys: self._provider_keys[provider].discard(key) @@ -188,7 +189,9 @@ class UploadCache: return evicted - async def aget(self, file: FileInput, provider: str) -> CachedUpload | None: + async def aget( + self, file: FileInput, provider: ProviderType + ) -> CachedUpload | None: """Get a cached upload for a file. Args: @@ -201,7 +204,9 @@ class UploadCache: file_hash = _compute_file_hash(file) return await self.aget_by_hash(file_hash, provider) - async def aget_by_hash(self, file_hash: str, provider: str) -> CachedUpload | None: + async def aget_by_hash( + self, file_hash: str, provider: ProviderType + ) -> CachedUpload | None: """Get a cached upload by file hash. Args: @@ -227,7 +232,7 @@ class UploadCache: async def aset( self, file: FileInput, - provider: str, + provider: ProviderType, file_id: str, file_uri: str | None = None, expires_at: datetime | None = None, @@ -258,7 +263,7 @@ class UploadCache: self, file_hash: str, content_type: str, - provider: str, + provider: ProviderType, file_id: str, file_uri: str | None = None, expires_at: datetime | None = None, @@ -299,7 +304,7 @@ class UploadCache: logger.debug(f"Cached upload: {file_id} for provider {provider}") return cached - async def aremove(self, file: FileInput, provider: str) -> bool: + async def aremove(self, file: FileInput, provider: ProviderType) -> bool: """Remove a cached upload. Args: @@ -318,7 +323,7 @@ class UploadCache: self._untrack_key(provider, key) return removed - async def aremove_by_file_id(self, file_id: str, provider: str) -> bool: + async def aremove_by_file_id(self, file_id: str, provider: ProviderType) -> bool: """Remove a cached upload by file ID. Args: @@ -375,7 +380,7 @@ class UploadCache: logger.debug(f"Cleared {count} cache entries") return count - async def aget_all_for_provider(self, provider: str) -> list[CachedUpload]: + async def aget_all_for_provider(self, provider: ProviderType) -> list[CachedUpload]: """Get all cached uploads for a provider. Args: @@ -407,12 +412,14 @@ class UploadCache: return future.result(timeout=30) return asyncio.run(coro) - def get(self, file: FileInput, provider: str) -> CachedUpload | None: + def get(self, file: FileInput, provider: ProviderType) -> CachedUpload | None: """Sync wrapper for aget.""" result: CachedUpload | None = self._run_sync(self.aget(file, provider)) return result - def get_by_hash(self, file_hash: str, provider: str) -> CachedUpload | None: + def get_by_hash( + self, file_hash: str, provider: ProviderType + ) -> CachedUpload | None: """Sync wrapper for aget_by_hash.""" result: CachedUpload | None = self._run_sync( self.aget_by_hash(file_hash, provider) @@ -422,7 +429,7 @@ class UploadCache: def set( self, file: FileInput, - provider: str, + provider: ProviderType, file_id: str, file_uri: str | None = None, expires_at: datetime | None = None, @@ -437,7 +444,7 @@ class UploadCache: self, file_hash: str, content_type: str, - provider: str, + provider: ProviderType, file_id: str, file_uri: str | None = None, expires_at: datetime | None = None, @@ -450,12 +457,12 @@ class UploadCache: ) return result - def remove(self, file: FileInput, provider: str) -> bool: + def remove(self, file: FileInput, provider: ProviderType) -> bool: """Sync wrapper for aremove.""" result: bool = self._run_sync(self.aremove(file, provider)) return result - def remove_by_file_id(self, file_id: str, provider: str) -> bool: + def remove_by_file_id(self, file_id: str, provider: ProviderType) -> bool: """Sync wrapper for aremove_by_file_id.""" result: bool = self._run_sync(self.aremove_by_file_id(file_id, provider)) return result @@ -470,7 +477,7 @@ class UploadCache: result: int = self._run_sync(self.aclear()) return result - def get_all_for_provider(self, provider: str) -> list[CachedUpload]: + def get_all_for_provider(self, provider: ProviderType) -> list[CachedUpload]: """Sync wrapper for aget_all_for_provider.""" result: list[CachedUpload] = self._run_sync( self.aget_all_for_provider(provider) @@ -481,7 +488,7 @@ class UploadCache: """Return the number of cached entries.""" return sum(len(keys) for keys in self._provider_keys.values()) - def get_providers(self) -> builtins.set[str]: + def get_providers(self) -> builtins.set[ProviderType]: """Get all provider names that have cached entries. Returns: @@ -535,7 +542,7 @@ def _cleanup_on_exit() -> None: if _default_cache is None or len(_default_cache) == 0: return - from crewai.files.cleanup import cleanup_uploaded_files + from crewai_files.cache.cleanup import cleanup_uploaded_files try: cleanup_uploaded_files(_default_cache) diff --git a/lib/crewai-files/src/crewai_files/core/__init__.py b/lib/crewai-files/src/crewai_files/core/__init__.py new file mode 100644 index 000000000..ee057c8cb --- /dev/null +++ b/lib/crewai-files/src/crewai_files/core/__init__.py @@ -0,0 +1,92 @@ +"""Core file types and sources.""" + +from crewai_files.core.constants import ( + BACKOFF_BASE_DELAY, + BACKOFF_JITTER_FACTOR, + BACKOFF_MAX_DELAY, + DEFAULT_MAX_CACHE_ENTRIES, + DEFAULT_MAX_FILE_SIZE_BYTES, + DEFAULT_TTL_SECONDS, + DEFAULT_UPLOAD_CHUNK_SIZE, + FILES_API_MAX_SIZE, + GEMINI_FILE_TTL, + MAGIC_BUFFER_SIZE, + MAX_CONCURRENCY, + MULTIPART_CHUNKSIZE, + MULTIPART_THRESHOLD, + UPLOAD_MAX_RETRIES, + UPLOAD_RETRY_DELAY_BASE, +) +from crewai_files.core.resolved import ( + FileReference, + InlineBase64, + InlineBytes, + ResolvedFile, + UrlReference, +) +from crewai_files.core.sources import ( + AsyncFileStream, + FileBytes, + FilePath, + FileSource, + FileStream, + FileUrl, +) +from crewai_files.core.types import ( + AudioFile, + AudioMimeType, + BaseFile, + CoercedFileSource, + File, + FileInput, + FileMode, + ImageFile, + ImageMimeType, + PDFFile, + TextFile, + VideoFile, + VideoMimeType, +) + + +__all__ = [ + "BACKOFF_BASE_DELAY", + "BACKOFF_JITTER_FACTOR", + "BACKOFF_MAX_DELAY", + "DEFAULT_MAX_CACHE_ENTRIES", + "DEFAULT_MAX_FILE_SIZE_BYTES", + "DEFAULT_TTL_SECONDS", + "DEFAULT_UPLOAD_CHUNK_SIZE", + "FILES_API_MAX_SIZE", + "GEMINI_FILE_TTL", + "MAGIC_BUFFER_SIZE", + "MAX_CONCURRENCY", + "MULTIPART_CHUNKSIZE", + "MULTIPART_THRESHOLD", + "UPLOAD_MAX_RETRIES", + "UPLOAD_RETRY_DELAY_BASE", + "AsyncFileStream", + "AudioFile", + "AudioMimeType", + "BaseFile", + "CoercedFileSource", + "File", + "FileBytes", + "FileInput", + "FileMode", + "FilePath", + "FileReference", + "FileSource", + "FileStream", + "FileUrl", + "ImageFile", + "ImageMimeType", + "InlineBase64", + "InlineBytes", + "PDFFile", + "ResolvedFile", + "TextFile", + "UrlReference", + "VideoFile", + "VideoMimeType", +] diff --git a/lib/crewai/src/crewai/files/constants.py b/lib/crewai-files/src/crewai_files/core/constants.py similarity index 100% rename from lib/crewai/src/crewai/files/constants.py rename to lib/crewai-files/src/crewai_files/core/constants.py diff --git a/lib/crewai/src/crewai/files/resolved.py b/lib/crewai-files/src/crewai_files/core/resolved.py similarity index 100% rename from lib/crewai/src/crewai/files/resolved.py rename to lib/crewai-files/src/crewai_files/core/resolved.py diff --git a/lib/crewai/src/crewai/files/file.py b/lib/crewai-files/src/crewai_files/core/sources.py similarity index 97% rename from lib/crewai/src/crewai/files/file.py rename to lib/crewai-files/src/crewai_files/core/sources.py index 7ed2ab5d3..3aaccf70e 100644 --- a/lib/crewai/src/crewai/files/file.py +++ b/lib/crewai-files/src/crewai_files/core/sources.py @@ -17,8 +17,9 @@ from pydantic import ( model_validator, ) from pydantic_core import CoreSchema, core_schema +from typing_extensions import TypeIs -from crewai.files.constants import DEFAULT_MAX_FILE_SIZE_BYTES, MAGIC_BUFFER_SIZE +from crewai_files.core.constants import DEFAULT_MAX_FILE_SIZE_BYTES, MAGIC_BUFFER_SIZE @runtime_checkable @@ -146,7 +147,7 @@ class FilePath(BaseModel): @model_validator(mode="after") def _validate_file_exists(self) -> FilePath: """Validate that the file exists, is secure, and within size limits.""" - from crewai.files.processing.exceptions import FileTooLargeError + from crewai_files.processing.exceptions import FileTooLargeError path_str = str(self.path) if ".." in path_str: @@ -482,6 +483,11 @@ class FileUrl(BaseModel): FileSource = FilePath | FileBytes | FileStream | AsyncFileStream | FileUrl +def is_file_source(v: object) -> TypeIs[FileSource]: + """Type guard to narrow input to FileSource.""" + return isinstance(v, (FilePath, FileBytes, FileStream, FileUrl)) + + def _normalize_source(value: Any) -> FileSource: """Convert raw input to appropriate source type.""" if isinstance(value, (FilePath, FileBytes, FileStream, AsyncFileStream, FileUrl)): diff --git a/lib/crewai/src/crewai/files/content_types.py b/lib/crewai-files/src/crewai_files/core/types.py similarity index 98% rename from lib/crewai/src/crewai/files/content_types.py rename to lib/crewai-files/src/crewai_files/core/types.py index 7995a0557..e3681e6c3 100644 --- a/lib/crewai/src/crewai/files/content_types.py +++ b/lib/crewai-files/src/crewai_files/core/types.py @@ -10,15 +10,15 @@ from typing import Annotated, Any, BinaryIO, Literal, Self from pydantic import BaseModel, Field, GetCoreSchemaHandler from pydantic_core import CoreSchema, core_schema -from crewai.files.file import ( +from crewai_files.core.sources import ( AsyncFileStream, FileBytes, FilePath, FileSource, FileStream, FileUrl, + is_file_source, ) -from crewai.files.utils import is_file_source FileSourceInput = str | Path | bytes | IOBase | FileSource diff --git a/lib/crewai/src/crewai/files/processing/__init__.py b/lib/crewai-files/src/crewai_files/processing/__init__.py similarity index 82% rename from lib/crewai/src/crewai/files/processing/__init__.py rename to lib/crewai-files/src/crewai_files/processing/__init__.py index 21694f180..080310f1d 100644 --- a/lib/crewai/src/crewai/files/processing/__init__.py +++ b/lib/crewai-files/src/crewai_files/processing/__init__.py @@ -4,7 +4,7 @@ This module provides validation, transformation, and processing utilities for files used in multimodal LLM interactions. """ -from crewai.files.processing.constraints import ( +from crewai_files.processing.constraints import ( ANTHROPIC_CONSTRAINTS, BEDROCK_CONSTRAINTS, GEMINI_CONSTRAINTS, @@ -16,16 +16,16 @@ from crewai.files.processing.constraints import ( VideoConstraints, get_constraints_for_provider, ) -from crewai.files.processing.enums import FileHandling -from crewai.files.processing.exceptions import ( +from crewai_files.processing.enums import FileHandling +from crewai_files.processing.exceptions import ( FileProcessingError, FileTooLargeError, FileValidationError, ProcessingDependencyError, UnsupportedFileTypeError, ) -from crewai.files.processing.processor import FileProcessor -from crewai.files.processing.validators import ( +from crewai_files.processing.processor import FileProcessor +from crewai_files.processing.validators import ( validate_audio, validate_file, validate_image, diff --git a/lib/crewai/src/crewai/files/processing/constraints.py b/lib/crewai-files/src/crewai_files/processing/constraints.py similarity index 99% rename from lib/crewai/src/crewai/files/processing/constraints.py rename to lib/crewai-files/src/crewai_files/processing/constraints.py index 8d8fba783..e9f68341a 100644 --- a/lib/crewai/src/crewai/files/processing/constraints.py +++ b/lib/crewai-files/src/crewai_files/processing/constraints.py @@ -4,7 +4,7 @@ from dataclasses import dataclass from functools import lru_cache from typing import Literal -from crewai.files.content_types import ( +from crewai_files.core.types import ( AudioMimeType, ImageMimeType, VideoMimeType, diff --git a/lib/crewai/src/crewai/files/processing/enums.py b/lib/crewai-files/src/crewai_files/processing/enums.py similarity index 100% rename from lib/crewai/src/crewai/files/processing/enums.py rename to lib/crewai-files/src/crewai_files/processing/enums.py diff --git a/lib/crewai/src/crewai/files/processing/exceptions.py b/lib/crewai-files/src/crewai_files/processing/exceptions.py similarity index 100% rename from lib/crewai/src/crewai/files/processing/exceptions.py rename to lib/crewai-files/src/crewai_files/processing/exceptions.py diff --git a/lib/crewai/src/crewai/files/processing/processor.py b/lib/crewai-files/src/crewai_files/processing/processor.py similarity index 97% rename from lib/crewai/src/crewai/files/processing/processor.py rename to lib/crewai-files/src/crewai_files/processing/processor.py index 70c665334..afb7fbbde 100644 --- a/lib/crewai/src/crewai/files/processing/processor.py +++ b/lib/crewai-files/src/crewai_files/processing/processor.py @@ -4,7 +4,7 @@ import asyncio from collections.abc import Sequence import logging -from crewai.files.content_types import ( +from crewai_files.core.types import ( AudioFile, File, FileInput, @@ -13,18 +13,18 @@ from crewai.files.content_types import ( TextFile, VideoFile, ) -from crewai.files.processing.constraints import ( +from crewai_files.processing.constraints import ( ProviderConstraints, get_constraints_for_provider, ) -from crewai.files.processing.enums import FileHandling -from crewai.files.processing.exceptions import ( +from crewai_files.processing.enums import FileHandling +from crewai_files.processing.exceptions import ( FileProcessingError, FileTooLargeError, FileValidationError, UnsupportedFileTypeError, ) -from crewai.files.processing.transformers import ( +from crewai_files.processing.transformers import ( chunk_pdf, chunk_text, get_image_dimensions, @@ -32,7 +32,7 @@ from crewai.files.processing.transformers import ( optimize_image, resize_image, ) -from crewai.files.processing.validators import validate_file +from crewai_files.processing.validators import validate_file logger = logging.getLogger(__name__) diff --git a/lib/crewai/src/crewai/files/processing/transformers.py b/lib/crewai-files/src/crewai_files/processing/transformers.py similarity index 98% rename from lib/crewai/src/crewai/files/processing/transformers.py rename to lib/crewai-files/src/crewai_files/processing/transformers.py index cae2384fb..a51f13c92 100644 --- a/lib/crewai/src/crewai/files/processing/transformers.py +++ b/lib/crewai-files/src/crewai_files/processing/transformers.py @@ -4,9 +4,9 @@ from collections.abc import Iterator import io import logging -from crewai.files.content_types import ImageFile, PDFFile, TextFile -from crewai.files.file import FileBytes -from crewai.files.processing.exceptions import ProcessingDependencyError +from crewai_files.core.sources import FileBytes +from crewai_files.core.types import ImageFile, PDFFile, TextFile +from crewai_files.processing.exceptions import ProcessingDependencyError logger = logging.getLogger(__name__) diff --git a/lib/crewai/src/crewai/files/processing/validators.py b/lib/crewai-files/src/crewai_files/processing/validators.py similarity index 99% rename from lib/crewai/src/crewai/files/processing/validators.py rename to lib/crewai-files/src/crewai_files/processing/validators.py index f09f8a659..9f2c94e92 100644 --- a/lib/crewai/src/crewai/files/processing/validators.py +++ b/lib/crewai-files/src/crewai_files/processing/validators.py @@ -4,7 +4,7 @@ from collections.abc import Sequence import io import logging -from crewai.files.content_types import ( +from crewai_files.core.types import ( AudioFile, FileInput, ImageFile, @@ -12,14 +12,14 @@ from crewai.files.content_types import ( TextFile, VideoFile, ) -from crewai.files.processing.constraints import ( +from crewai_files.processing.constraints import ( AudioConstraints, ImageConstraints, PDFConstraints, ProviderConstraints, VideoConstraints, ) -from crewai.files.processing.exceptions import ( +from crewai_files.processing.exceptions import ( FileTooLargeError, FileValidationError, UnsupportedFileTypeError, diff --git a/lib/crewai-files/src/crewai_files/py.typed b/lib/crewai-files/src/crewai_files/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/lib/crewai-files/src/crewai_files/resolution/__init__.py b/lib/crewai-files/src/crewai_files/resolution/__init__.py new file mode 100644 index 000000000..6d6fe4894 --- /dev/null +++ b/lib/crewai-files/src/crewai_files/resolution/__init__.py @@ -0,0 +1,16 @@ +"""File resolution logic.""" + +from crewai_files.resolution.resolver import FileResolver +from crewai_files.resolution.utils import ( + is_file_source, + normalize_input_files, + wrap_file_source, +) + + +__all__ = [ + "FileResolver", + "is_file_source", + "normalize_input_files", + "wrap_file_source", +] diff --git a/lib/crewai/src/crewai/files/resolver.py b/lib/crewai-files/src/crewai_files/resolution/resolver.py similarity index 96% rename from lib/crewai/src/crewai/files/resolver.py rename to lib/crewai-files/src/crewai_files/resolution/resolver.py index 4aa2e4b65..31c54c55a 100644 --- a/lib/crewai/src/crewai/files/resolver.py +++ b/lib/crewai-files/src/crewai_files/resolution/resolver.py @@ -6,11 +6,19 @@ from dataclasses import dataclass, field import hashlib import logging -from crewai.files.constants import UPLOAD_MAX_RETRIES, UPLOAD_RETRY_DELAY_BASE -from crewai.files.content_types import FileInput -from crewai.files.file import FileUrl -from crewai.files.metrics import measure_operation -from crewai.files.processing.constraints import ( +from crewai_files.cache.metrics import measure_operation +from crewai_files.cache.upload_cache import CachedUpload, UploadCache +from crewai_files.core.constants import UPLOAD_MAX_RETRIES, UPLOAD_RETRY_DELAY_BASE +from crewai_files.core.resolved import ( + FileReference, + InlineBase64, + InlineBytes, + ResolvedFile, + UrlReference, +) +from crewai_files.core.sources import FileUrl +from crewai_files.core.types import FileInput +from crewai_files.processing.constraints import ( AudioConstraints, ImageConstraints, PDFConstraints, @@ -18,17 +26,9 @@ from crewai.files.processing.constraints import ( VideoConstraints, get_constraints_for_provider, ) -from crewai.files.resolved import ( - FileReference, - InlineBase64, - InlineBytes, - ResolvedFile, - UrlReference, -) -from crewai.files.upload_cache import CachedUpload, UploadCache -from crewai.files.uploaders import UploadResult, get_uploader -from crewai.files.uploaders.base import FileUploader -from crewai.files.uploaders.factory import ProviderType +from crewai_files.uploaders import UploadResult, get_uploader +from crewai_files.uploaders.base import FileUploader +from crewai_files.uploaders.factory import ProviderType logger = logging.getLogger(__name__) @@ -340,7 +340,7 @@ class FileResolver: """ import time - from crewai.files.processing.exceptions import ( + from crewai_files.processing.exceptions import ( PermanentUploadError, TransientUploadError, ) @@ -555,7 +555,7 @@ class FileResolver: Returns: UploadResult if successful, None otherwise. """ - from crewai.files.processing.exceptions import ( + from crewai_files.processing.exceptions import ( PermanentUploadError, TransientUploadError, ) @@ -617,7 +617,7 @@ class FileResolver: return self._uploaders.get(provider) - def get_cached_uploads(self, provider: str) -> list[CachedUpload]: + def get_cached_uploads(self, provider: ProviderType) -> list[CachedUpload]: """Get all cached uploads for a provider. Args: diff --git a/lib/crewai/src/crewai/files/utils.py b/lib/crewai-files/src/crewai_files/resolution/utils.py similarity index 79% rename from lib/crewai/src/crewai/files/utils.py rename to lib/crewai-files/src/crewai_files/resolution/utils.py index 2f424bf64..d5251ff09 100644 --- a/lib/crewai/src/crewai/files/utils.py +++ b/lib/crewai-files/src/crewai_files/resolution/utils.py @@ -5,19 +5,15 @@ from __future__ import annotations from pathlib import Path from typing import TYPE_CHECKING -from typing_extensions import TypeIs +from crewai_files.core.sources import is_file_source if TYPE_CHECKING: - from crewai.files.content_types import FileInput - from crewai.files.file import FileSource, FileSourceInput + from crewai_files.core.sources import FileSource, FileSourceInput + from crewai_files.core.types import FileInput -def is_file_source(v: object) -> TypeIs[FileSource]: - """Type guard to narrow input to FileSource.""" - from crewai.files.file import FileBytes, FilePath, FileStream, FileUrl - - return isinstance(v, (FilePath, FileBytes, FileStream, FileUrl)) +__all__ = ["is_file_source", "normalize_input_files", "wrap_file_source"] def wrap_file_source(source: FileSource) -> FileInput: @@ -29,7 +25,7 @@ def wrap_file_source(source: FileSource) -> FileInput: Returns: Typed FileInput wrapper based on content type. """ - from crewai.files.content_types import ( + from crewai_files.core.types import ( AudioFile, ImageFile, PDFFile, @@ -61,8 +57,8 @@ def normalize_input_files( Returns: Dictionary mapping names to FileInput wrappers. """ - from crewai.files.content_types import BaseFile - from crewai.files.file import FileBytes, FilePath, FileStream, FileUrl + from crewai_files.core.sources import FileBytes, FilePath, FileStream, FileUrl + from crewai_files.core.types import BaseFile result: dict[str, FileInput] = {} diff --git a/lib/crewai/src/crewai/files/uploaders/__init__.py b/lib/crewai-files/src/crewai_files/uploaders/__init__.py similarity index 53% rename from lib/crewai/src/crewai/files/uploaders/__init__.py rename to lib/crewai-files/src/crewai_files/uploaders/__init__.py index d3664e71a..7deafcd4a 100644 --- a/lib/crewai/src/crewai/files/uploaders/__init__.py +++ b/lib/crewai-files/src/crewai_files/uploaders/__init__.py @@ -1,7 +1,7 @@ """File uploader implementations for provider File APIs.""" -from crewai.files.uploaders.base import FileUploader, UploadResult -from crewai.files.uploaders.factory import get_uploader +from crewai_files.uploaders.base import FileUploader, UploadResult +from crewai_files.uploaders.factory import get_uploader __all__ = [ diff --git a/lib/crewai/src/crewai/files/uploaders/anthropic.py b/lib/crewai-files/src/crewai_files/uploaders/anthropic.py similarity index 97% rename from lib/crewai/src/crewai/files/uploaders/anthropic.py rename to lib/crewai-files/src/crewai_files/uploaders/anthropic.py index bf778c53f..9a975e64e 100644 --- a/lib/crewai/src/crewai/files/uploaders/anthropic.py +++ b/lib/crewai-files/src/crewai_files/uploaders/anthropic.py @@ -7,9 +7,9 @@ import logging import os from typing import Any -from crewai.files.content_types import FileInput -from crewai.files.processing.exceptions import classify_upload_error -from crewai.files.uploaders.base import FileUploader, UploadResult +from crewai_files.core.types import FileInput +from crewai_files.processing.exceptions import classify_upload_error +from crewai_files.uploaders.base import FileUploader, UploadResult logger = logging.getLogger(__name__) diff --git a/lib/crewai/src/crewai/files/uploaders/base.py b/lib/crewai-files/src/crewai_files/uploaders/base.py similarity index 98% rename from lib/crewai/src/crewai/files/uploaders/base.py rename to lib/crewai-files/src/crewai_files/uploaders/base.py index 83b02f52a..6df1695e8 100644 --- a/lib/crewai/src/crewai/files/uploaders/base.py +++ b/lib/crewai-files/src/crewai_files/uploaders/base.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from datetime import datetime from typing import Any -from crewai.files.content_types import FileInput +from crewai_files.core.types import FileInput @dataclass diff --git a/lib/crewai/src/crewai/files/uploaders/bedrock.py b/lib/crewai-files/src/crewai_files/uploaders/bedrock.py similarity index 98% rename from lib/crewai/src/crewai/files/uploaders/bedrock.py rename to lib/crewai-files/src/crewai_files/uploaders/bedrock.py index c887cae89..e050df7e5 100644 --- a/lib/crewai/src/crewai/files/uploaders/bedrock.py +++ b/lib/crewai-files/src/crewai_files/uploaders/bedrock.py @@ -8,18 +8,18 @@ import os from pathlib import Path from typing import Any -from crewai.files.constants import ( +from crewai_files.core.constants import ( MAX_CONCURRENCY, MULTIPART_CHUNKSIZE, MULTIPART_THRESHOLD, ) -from crewai.files.content_types import FileInput -from crewai.files.file import FileBytes, FilePath -from crewai.files.processing.exceptions import ( +from crewai_files.core.sources import FileBytes, FilePath +from crewai_files.core.types import FileInput +from crewai_files.processing.exceptions import ( PermanentUploadError, TransientUploadError, ) -from crewai.files.uploaders.base import FileUploader, UploadResult +from crewai_files.uploaders.base import FileUploader, UploadResult logger = logging.getLogger(__name__) diff --git a/lib/crewai/src/crewai/files/uploaders/factory.py b/lib/crewai-files/src/crewai_files/uploaders/factory.py similarity index 87% rename from lib/crewai/src/crewai/files/uploaders/factory.py rename to lib/crewai-files/src/crewai_files/uploaders/factory.py index b6ae6b963..9407d6dd2 100644 --- a/lib/crewai/src/crewai/files/uploaders/factory.py +++ b/lib/crewai-files/src/crewai_files/uploaders/factory.py @@ -7,10 +7,10 @@ from typing import Literal, TypeAlias, TypedDict, overload from typing_extensions import NotRequired, Unpack -from crewai.files.uploaders.anthropic import AnthropicFileUploader -from crewai.files.uploaders.bedrock import BedrockFileUploader -from crewai.files.uploaders.gemini import GeminiFileUploader -from crewai.files.uploaders.openai import OpenAIFileUploader +from crewai_files.uploaders.anthropic import AnthropicFileUploader +from crewai_files.uploaders.bedrock import BedrockFileUploader +from crewai_files.uploaders.gemini import GeminiFileUploader +from crewai_files.uploaders.openai import OpenAIFileUploader logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ FileUploaderType: TypeAlias = ( GeminiProviderType = Literal["gemini", "google"] AnthropicProviderType = Literal["anthropic", "claude"] -OpenAIProviderType = Literal["openai", "gpt"] +OpenAIProviderType = Literal["openai", "gpt", "azure"] BedrockProviderType = Literal["bedrock", "aws"] ProviderType: TypeAlias = ( @@ -131,7 +131,7 @@ def get_uploader( if "gemini" in provider_lower or "google" in provider_lower: try: - from crewai.files.uploaders.gemini import GeminiFileUploader + from crewai_files.uploaders.gemini import GeminiFileUploader return GeminiFileUploader(api_key=kwargs.get("api_key")) except ImportError: @@ -142,7 +142,7 @@ def get_uploader( if "anthropic" in provider_lower or "claude" in provider_lower: try: - from crewai.files.uploaders.anthropic import AnthropicFileUploader + from crewai_files.uploaders.anthropic import AnthropicFileUploader return AnthropicFileUploader(api_key=kwargs.get("api_key")) except ImportError: @@ -151,9 +151,13 @@ def get_uploader( ) raise - if "openai" in provider_lower or "gpt" in provider_lower: + if ( + "openai" in provider_lower + or "gpt" in provider_lower + or "azure" in provider_lower + ): try: - from crewai.files.uploaders.openai import OpenAIFileUploader + from crewai_files.uploaders.openai import OpenAIFileUploader return OpenAIFileUploader( api_key=kwargs.get("api_key"), @@ -176,7 +180,7 @@ def get_uploader( ) raise try: - from crewai.files.uploaders.bedrock import BedrockFileUploader + from crewai_files.uploaders.bedrock import BedrockFileUploader return BedrockFileUploader( bucket_name=kwargs.get("bucket_name"), diff --git a/lib/crewai/src/crewai/files/uploaders/gemini.py b/lib/crewai-files/src/crewai_files/uploaders/gemini.py similarity index 98% rename from lib/crewai/src/crewai/files/uploaders/gemini.py rename to lib/crewai-files/src/crewai_files/uploaders/gemini.py index ba171692e..e563093ee 100644 --- a/lib/crewai/src/crewai/files/uploaders/gemini.py +++ b/lib/crewai-files/src/crewai_files/uploaders/gemini.py @@ -12,20 +12,20 @@ import random import time from typing import Any -from crewai.files.constants import ( +from crewai_files.core.constants import ( BACKOFF_BASE_DELAY, BACKOFF_JITTER_FACTOR, BACKOFF_MAX_DELAY, GEMINI_FILE_TTL, ) -from crewai.files.content_types import FileInput -from crewai.files.file import FilePath -from crewai.files.processing.exceptions import ( +from crewai_files.core.sources import FilePath +from crewai_files.core.types import FileInput +from crewai_files.processing.exceptions import ( PermanentUploadError, TransientUploadError, classify_upload_error, ) -from crewai.files.uploaders.base import FileUploader, UploadResult +from crewai_files.uploaders.base import FileUploader, UploadResult logger = logging.getLogger(__name__) diff --git a/lib/crewai/src/crewai/files/uploaders/openai.py b/lib/crewai-files/src/crewai_files/uploaders/openai.py similarity index 98% rename from lib/crewai/src/crewai/files/uploaders/openai.py rename to lib/crewai-files/src/crewai_files/uploaders/openai.py index f8893dc01..ec18fcc43 100644 --- a/lib/crewai/src/crewai/files/uploaders/openai.py +++ b/lib/crewai-files/src/crewai_files/uploaders/openai.py @@ -8,15 +8,15 @@ import logging import os from typing import Any -from crewai.files.constants import DEFAULT_UPLOAD_CHUNK_SIZE, FILES_API_MAX_SIZE -from crewai.files.content_types import FileInput -from crewai.files.file import FileBytes, FilePath, FileStream -from crewai.files.processing.exceptions import ( +from crewai_files.core.constants import DEFAULT_UPLOAD_CHUNK_SIZE, FILES_API_MAX_SIZE +from crewai_files.core.sources import FileBytes, FilePath, FileStream +from crewai_files.core.types import FileInput +from crewai_files.processing.exceptions import ( PermanentUploadError, TransientUploadError, classify_upload_error, ) -from crewai.files.uploaders.base import FileUploader, UploadResult +from crewai_files.uploaders.base import FileUploader, UploadResult logger = logging.getLogger(__name__) diff --git a/lib/crewai-files/tests/processing/__init__.py b/lib/crewai-files/tests/processing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/crewai/tests/files/processing/test_constraints.py b/lib/crewai-files/tests/processing/test_constraints.py similarity index 99% rename from lib/crewai/tests/files/processing/test_constraints.py rename to lib/crewai-files/tests/processing/test_constraints.py index e23434829..c90dc760e 100644 --- a/lib/crewai/tests/files/processing/test_constraints.py +++ b/lib/crewai-files/tests/processing/test_constraints.py @@ -1,8 +1,6 @@ """Tests for provider constraints.""" -import pytest - -from crewai.files.processing.constraints import ( +from crewai_files.processing.constraints import ( ANTHROPIC_CONSTRAINTS, BEDROCK_CONSTRAINTS, GEMINI_CONSTRAINTS, @@ -14,6 +12,7 @@ from crewai.files.processing.constraints import ( VideoConstraints, get_constraints_for_provider, ) +import pytest class TestImageConstraints: diff --git a/lib/crewai/tests/files/processing/test_processor.py b/lib/crewai-files/tests/processing/test_processor.py similarity index 72% rename from lib/crewai/tests/files/processing/test_processor.py rename to lib/crewai-files/tests/processing/test_processor.py index 2454a44f3..1648b6aeb 100644 --- a/lib/crewai/tests/files/processing/test_processor.py +++ b/lib/crewai-files/tests/processing/test_processor.py @@ -1,32 +1,99 @@ """Tests for FileProcessor class.""" -import pytest - -from crewai.files import FileBytes, ImageFile, PDFFile, TextFile -from crewai.files.processing.constraints import ( +from crewai_files import FileBytes, ImageFile +from crewai_files.processing.constraints import ( ANTHROPIC_CONSTRAINTS, ImageConstraints, - PDFConstraints, ProviderConstraints, ) -from crewai.files.processing.enums import FileHandling -from crewai.files.processing.exceptions import ( +from crewai_files.processing.enums import FileHandling +from crewai_files.processing.exceptions import ( FileTooLargeError, - FileValidationError, ) -from crewai.files.processing.processor import FileProcessor +from crewai_files.processing.processor import FileProcessor +import pytest # Minimal valid PNG: 8x8 pixel RGB image (valid for PIL) -MINIMAL_PNG = bytes([ - 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, - 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, - 0x08, 0x02, 0x00, 0x00, 0x00, 0x4b, 0x6d, 0x29, 0xdc, 0x00, 0x00, 0x00, - 0x12, 0x49, 0x44, 0x41, 0x54, 0x78, 0x9c, 0x63, 0xfc, 0xcf, 0x80, 0x1d, - 0x30, 0xe1, 0x10, 0x1f, 0xa4, 0x12, 0x00, 0xcd, 0x41, 0x01, 0x0f, 0xe8, - 0x41, 0xe2, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, - 0x42, 0x60, 0x82, -]) +MINIMAL_PNG = bytes( + [ + 0x89, + 0x50, + 0x4E, + 0x47, + 0x0D, + 0x0A, + 0x1A, + 0x0A, + 0x00, + 0x00, + 0x00, + 0x0D, + 0x49, + 0x48, + 0x44, + 0x52, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x08, + 0x08, + 0x02, + 0x00, + 0x00, + 0x00, + 0x4B, + 0x6D, + 0x29, + 0xDC, + 0x00, + 0x00, + 0x00, + 0x12, + 0x49, + 0x44, + 0x41, + 0x54, + 0x78, + 0x9C, + 0x63, + 0xFC, + 0xCF, + 0x80, + 0x1D, + 0x30, + 0xE1, + 0x10, + 0x1F, + 0xA4, + 0x12, + 0x00, + 0xCD, + 0x41, + 0x01, + 0x0F, + 0xE8, + 0x41, + 0xE2, + 0x6F, + 0x00, + 0x00, + 0x00, + 0x00, + 0x49, + 0x45, + 0x4E, + 0x44, + 0xAE, + 0x42, + 0x60, + 0x82, + ] +) # Minimal valid PDF MINIMAL_PDF = ( @@ -96,7 +163,9 @@ class TestFileProcessorValidate: ) processor = FileProcessor(constraints=constraints) # Set mode to strict on the file - file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict") + file = ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict" + ) with pytest.raises(FileTooLargeError): processor.validate(file) @@ -131,7 +200,9 @@ class TestFileProcessorProcess: ) processor = FileProcessor(constraints=constraints) # Set mode to strict on the file - file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict") + file = ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict" + ) with pytest.raises(FileTooLargeError): processor.process(file) @@ -144,7 +215,9 @@ class TestFileProcessorProcess: ) processor = FileProcessor(constraints=constraints) # Set mode to warn on the file - file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="warn") + file = ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="warn" + ) result = processor.process(file) @@ -158,8 +231,12 @@ class TestFileProcessorProcessFiles: """Test processing multiple files.""" processor = FileProcessor(constraints=ANTHROPIC_CONSTRAINTS) files = { - "image1": ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test1.png")), - "image2": ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test2.png")), + "image1": ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test1.png") + ), + "image2": ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test2.png") + ), } result = processor.process_files(files) @@ -198,7 +275,9 @@ class TestFileProcessorPerFileMode: def test_file_custom_mode(self): """Test setting custom mode on file.""" - file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict") + file = ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict" + ) assert file.mode == "strict" def test_processor_respects_file_mode(self): @@ -210,11 +289,15 @@ class TestFileProcessorPerFileMode: processor = FileProcessor(constraints=constraints) # File with strict mode should raise - strict_file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict") + strict_file = ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="strict" + ) with pytest.raises(FileTooLargeError): processor.process(strict_file) # File with warn mode should not raise - warn_file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="warn") + warn_file = ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test.png"), mode="warn" + ) result = processor.process(warn_file) - assert result == warn_file \ No newline at end of file + assert result == warn_file diff --git a/lib/crewai/tests/files/processing/test_transformers.py b/lib/crewai-files/tests/processing/test_transformers.py similarity index 94% rename from lib/crewai/tests/files/processing/test_transformers.py rename to lib/crewai-files/tests/processing/test_transformers.py index c40cd412f..1fa28c7fa 100644 --- a/lib/crewai/tests/files/processing/test_transformers.py +++ b/lib/crewai-files/tests/processing/test_transformers.py @@ -1,14 +1,12 @@ """Unit tests for file transformers.""" import io -from unittest.mock import MagicMock, patch +from unittest.mock import patch -import pytest - -from crewai.files import ImageFile, PDFFile, TextFile -from crewai.files.file import FileBytes -from crewai.files.processing.exceptions import ProcessingDependencyError -from crewai.files.processing.transformers import ( +from crewai_files import ImageFile, PDFFile, TextFile +from crewai_files.core.sources import FileBytes +from crewai_files.processing.exceptions import ProcessingDependencyError +from crewai_files.processing.transformers import ( chunk_pdf, chunk_text, get_image_dimensions, @@ -16,6 +14,7 @@ from crewai.files.processing.transformers import ( optimize_image, resize_image, ) +import pytest def create_test_png(width: int = 100, height: int = 100) -> bytes: @@ -112,7 +111,7 @@ class TestResizeImage: # Force reimport to trigger ImportError import importlib - import crewai.files.processing.transformers as t + import crewai_files.processing.transformers as t importlib.reload(t) t.resize_image(img, 100, 100) @@ -267,14 +266,18 @@ class TestChunkText: content = "Line one\nLine two\nLine three\nLine four\nLine five" text = TextFile(source=content.encode(), filename="lines.txt") - result = list(chunk_text(text, max_chars=25, overlap_chars=0, split_on_newlines=True)) + result = list( + chunk_text(text, max_chars=25, overlap_chars=0, split_on_newlines=True) + ) # Should split at newline boundaries for chunk in result: chunk_text_content = chunk.read().decode() # Chunks should end at newlines (except possibly the last) if chunk != result[-1]: - assert chunk_text_content.endswith("\n") or len(chunk_text_content) <= 25 + assert ( + chunk_text_content.endswith("\n") or len(chunk_text_content) <= 25 + ) def test_chunk_with_overlap(self) -> None: """Test chunking with overlapping characters.""" @@ -321,7 +324,7 @@ class TestGetImageDimensions: def test_returns_none_without_pillow(self) -> None: """Test that None is returned when Pillow is not installed.""" png_bytes = create_test_png(100, 100) - img = ImageFile(source=FileBytes(data=png_bytes, filename="test.png")) + ImageFile(source=FileBytes(data=png_bytes, filename="test.png")) with patch.dict("sys.modules", {"PIL": None}): # Can't easily test this without unloading module @@ -356,4 +359,4 @@ class TestGetPdfPageCount: count = get_pdf_page_count(pdf) - assert count is None \ No newline at end of file + assert count is None diff --git a/lib/crewai/tests/files/processing/test_validators.py b/lib/crewai-files/tests/processing/test_validators.py similarity index 91% rename from lib/crewai/tests/files/processing/test_validators.py rename to lib/crewai-files/tests/processing/test_validators.py index 4be47e1d7..98e92a90f 100644 --- a/lib/crewai/tests/files/processing/test_validators.py +++ b/lib/crewai-files/tests/processing/test_validators.py @@ -2,10 +2,8 @@ from unittest.mock import patch -import pytest - -from crewai.files import AudioFile, FileBytes, ImageFile, PDFFile, TextFile, VideoFile -from crewai.files.processing.constraints import ( +from crewai_files import AudioFile, FileBytes, ImageFile, PDFFile, TextFile, VideoFile +from crewai_files.processing.constraints import ( ANTHROPIC_CONSTRAINTS, AudioConstraints, ImageConstraints, @@ -13,12 +11,12 @@ from crewai.files.processing.constraints import ( ProviderConstraints, VideoConstraints, ) -from crewai.files.processing.exceptions import ( +from crewai_files.processing.exceptions import ( FileTooLargeError, FileValidationError, UnsupportedFileTypeError, ) -from crewai.files.processing.validators import ( +from crewai_files.processing.validators import ( _get_audio_duration, _get_video_duration, validate_audio, @@ -28,18 +26,89 @@ from crewai.files.processing.validators import ( validate_text, validate_video, ) +import pytest # Minimal valid PNG: 8x8 pixel RGB image (valid for PIL) -MINIMAL_PNG = bytes([ - 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, - 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, - 0x08, 0x02, 0x00, 0x00, 0x00, 0x4b, 0x6d, 0x29, 0xdc, 0x00, 0x00, 0x00, - 0x12, 0x49, 0x44, 0x41, 0x54, 0x78, 0x9c, 0x63, 0xfc, 0xcf, 0x80, 0x1d, - 0x30, 0xe1, 0x10, 0x1f, 0xa4, 0x12, 0x00, 0xcd, 0x41, 0x01, 0x0f, 0xe8, - 0x41, 0xe2, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, - 0x42, 0x60, 0x82, -]) +MINIMAL_PNG = bytes( + [ + 0x89, + 0x50, + 0x4E, + 0x47, + 0x0D, + 0x0A, + 0x1A, + 0x0A, + 0x00, + 0x00, + 0x00, + 0x0D, + 0x49, + 0x48, + 0x44, + 0x52, + 0x00, + 0x00, + 0x00, + 0x08, + 0x00, + 0x00, + 0x00, + 0x08, + 0x08, + 0x02, + 0x00, + 0x00, + 0x00, + 0x4B, + 0x6D, + 0x29, + 0xDC, + 0x00, + 0x00, + 0x00, + 0x12, + 0x49, + 0x44, + 0x41, + 0x54, + 0x78, + 0x9C, + 0x63, + 0xFC, + 0xCF, + 0x80, + 0x1D, + 0x30, + 0xE1, + 0x10, + 0x1F, + 0xA4, + 0x12, + 0x00, + 0xCD, + 0x41, + 0x01, + 0x0F, + 0xE8, + 0x41, + 0xE2, + 0x6F, + 0x00, + 0x00, + 0x00, + 0x00, + 0x49, + 0x45, + 0x4E, + 0x44, + 0xAE, + 0x42, + 0x60, + 0x82, + ] +) # Minimal valid PDF MINIMAL_PDF = ( @@ -268,7 +337,7 @@ class TestValidateAudio: assert "not supported" in str(exc_info.value) - @patch("crewai.files.processing.validators._get_audio_duration") + @patch("crewai_files.processing.validators._get_audio_duration") def test_validate_audio_duration_passes(self, mock_get_duration): """Test validating audio when duration is under limit.""" mock_get_duration.return_value = 30.0 @@ -284,7 +353,7 @@ class TestValidateAudio: assert len(errors) == 0 mock_get_duration.assert_called_once() - @patch("crewai.files.processing.validators._get_audio_duration") + @patch("crewai_files.processing.validators._get_audio_duration") def test_validate_audio_duration_fails(self, mock_get_duration): """Test validating audio when duration exceeds limit.""" mock_get_duration.return_value = 120.5 @@ -302,7 +371,7 @@ class TestValidateAudio: assert "120.5s" in str(exc_info.value) assert "60s" in str(exc_info.value) - @patch("crewai.files.processing.validators._get_audio_duration") + @patch("crewai_files.processing.validators._get_audio_duration") def test_validate_audio_duration_no_raise(self, mock_get_duration): """Test audio duration validation with raise_on_error=False.""" mock_get_duration.return_value = 120.5 @@ -318,7 +387,7 @@ class TestValidateAudio: assert len(errors) == 1 assert "duration" in errors[0].lower() - @patch("crewai.files.processing.validators._get_audio_duration") + @patch("crewai_files.processing.validators._get_audio_duration") def test_validate_audio_duration_none_skips(self, mock_get_duration): """Test that duration validation is skipped when max_duration_seconds is None.""" constraints = AudioConstraints( @@ -333,7 +402,7 @@ class TestValidateAudio: assert len(errors) == 0 mock_get_duration.assert_not_called() - @patch("crewai.files.processing.validators._get_audio_duration") + @patch("crewai_files.processing.validators._get_audio_duration") def test_validate_audio_duration_detection_returns_none(self, mock_get_duration): """Test that validation passes when duration detection returns None.""" mock_get_duration.return_value = None @@ -391,7 +460,7 @@ class TestValidateVideo: assert "not supported" in str(exc_info.value) - @patch("crewai.files.processing.validators._get_video_duration") + @patch("crewai_files.processing.validators._get_video_duration") def test_validate_video_duration_passes(self, mock_get_duration): """Test validating video when duration is under limit.""" mock_get_duration.return_value = 30.0 @@ -407,7 +476,7 @@ class TestValidateVideo: assert len(errors) == 0 mock_get_duration.assert_called_once() - @patch("crewai.files.processing.validators._get_video_duration") + @patch("crewai_files.processing.validators._get_video_duration") def test_validate_video_duration_fails(self, mock_get_duration): """Test validating video when duration exceeds limit.""" mock_get_duration.return_value = 180.0 @@ -425,7 +494,7 @@ class TestValidateVideo: assert "180.0s" in str(exc_info.value) assert "60s" in str(exc_info.value) - @patch("crewai.files.processing.validators._get_video_duration") + @patch("crewai_files.processing.validators._get_video_duration") def test_validate_video_duration_no_raise(self, mock_get_duration): """Test video duration validation with raise_on_error=False.""" mock_get_duration.return_value = 180.0 @@ -441,7 +510,7 @@ class TestValidateVideo: assert len(errors) == 1 assert "duration" in errors[0].lower() - @patch("crewai.files.processing.validators._get_video_duration") + @patch("crewai_files.processing.validators._get_video_duration") def test_validate_video_duration_none_skips(self, mock_get_duration): """Test that duration validation is skipped when max_duration_seconds is None.""" constraints = VideoConstraints( @@ -456,7 +525,7 @@ class TestValidateVideo: assert len(errors) == 0 mock_get_duration.assert_not_called() - @patch("crewai.files.processing.validators._get_video_duration") + @patch("crewai_files.processing.validators._get_video_duration") def test_validate_video_duration_detection_returns_none(self, mock_get_duration): """Test that validation passes when duration detection returns None.""" mock_get_duration.return_value = None diff --git a/lib/crewai/tests/files/test_file_url.py b/lib/crewai-files/tests/test_file_url.py similarity index 97% rename from lib/crewai/tests/files/test_file_url.py rename to lib/crewai-files/tests/test_file_url.py index 1f862bbee..7885723e6 100644 --- a/lib/crewai/tests/files/test_file_url.py +++ b/lib/crewai-files/tests/test_file_url.py @@ -2,13 +2,12 @@ from unittest.mock import AsyncMock, MagicMock, patch +from crewai_files import FileBytes, FileUrl, ImageFile +from crewai_files.core.resolved import InlineBase64, UrlReference +from crewai_files.core.sources import FilePath, _normalize_source +from crewai_files.resolution.resolver import FileResolver import pytest -from crewai.files import FileBytes, FileUrl, ImageFile -from crewai.files.file import _normalize_source, FilePath -from crewai.files.resolved import InlineBase64, UrlReference -from crewai.files.resolver import FileResolver - class TestFileUrl: """Tests for FileUrl source type.""" diff --git a/lib/crewai/tests/files/test_resolved.py b/lib/crewai-files/tests/test_resolved.py similarity index 98% rename from lib/crewai/tests/files/test_resolved.py rename to lib/crewai-files/tests/test_resolved.py index d5101d2a1..6cad1b5a6 100644 --- a/lib/crewai/tests/files/test_resolved.py +++ b/lib/crewai-files/tests/test_resolved.py @@ -2,15 +2,14 @@ from datetime import datetime, timezone -import pytest - -from crewai.files.resolved import ( +from crewai_files.core.resolved import ( FileReference, InlineBase64, InlineBytes, ResolvedFile, UrlReference, ) +import pytest class TestInlineBase64: diff --git a/lib/crewai/tests/files/test_resolver.py b/lib/crewai-files/tests/test_resolver.py similarity index 92% rename from lib/crewai/tests/files/test_resolver.py rename to lib/crewai-files/tests/test_resolver.py index dcc890848..095eb4329 100644 --- a/lib/crewai/tests/files/test_resolver.py +++ b/lib/crewai-files/tests/test_resolver.py @@ -1,15 +1,13 @@ """Tests for FileResolver.""" -import pytest - -from crewai.files import FileBytes, ImageFile -from crewai.files.resolved import InlineBase64, InlineBytes -from crewai.files.resolver import ( +from crewai_files import FileBytes, ImageFile +from crewai_files.cache.upload_cache import UploadCache +from crewai_files.core.resolved import InlineBase64, InlineBytes +from crewai_files.resolution.resolver import ( FileResolver, FileResolverConfig, create_resolver, ) -from crewai.files.upload_cache import UploadCache # Minimal valid PNG @@ -74,8 +72,12 @@ class TestFileResolver: """Test resolving multiple files.""" resolver = FileResolver() files = { - "image1": ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test1.png")), - "image2": ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test2.png")), + "image1": ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test1.png") + ), + "image2": ImageFile( + source=FileBytes(data=MINIMAL_PNG, filename="test2.png") + ), } resolved = resolver.resolve_files(files, "openai") diff --git a/lib/crewai/tests/files/test_upload_cache.py b/lib/crewai-files/tests/test_upload_cache.py similarity index 93% rename from lib/crewai/tests/files/test_upload_cache.py rename to lib/crewai-files/tests/test_upload_cache.py index 7dd92268c..5b2bb6a47 100644 --- a/lib/crewai/tests/files/test_upload_cache.py +++ b/lib/crewai-files/tests/test_upload_cache.py @@ -2,10 +2,8 @@ from datetime import datetime, timedelta, timezone -import pytest - -from crewai.files import FileBytes, ImageFile -from crewai.files.upload_cache import CachedUpload, UploadCache +from crewai_files import FileBytes, ImageFile +from crewai_files.cache.upload_cache import CachedUpload, UploadCache # Minimal valid PNG @@ -92,7 +90,7 @@ class TestUploadCache: cache = UploadCache() file = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test.png")) - cached = cache.set( + cache.set( file=file, provider="gemini", file_id="file-123", @@ -160,7 +158,9 @@ class TestUploadCache: """Test clearing expired entries.""" cache = UploadCache() file1 = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test1.png")) - file2 = ImageFile(source=FileBytes(data=MINIMAL_PNG + b"x", filename="test2.png")) + file2 = ImageFile( + source=FileBytes(data=MINIMAL_PNG + b"x", filename="test2.png") + ) # Add one expired and one valid entry past = datetime.now(timezone.utc) - timedelta(hours=1) @@ -192,8 +192,12 @@ class TestUploadCache: """Test getting all cached uploads for a provider.""" cache = UploadCache() file1 = ImageFile(source=FileBytes(data=MINIMAL_PNG, filename="test1.png")) - file2 = ImageFile(source=FileBytes(data=MINIMAL_PNG + b"x", filename="test2.png")) - file3 = ImageFile(source=FileBytes(data=MINIMAL_PNG + b"xx", filename="test3.png")) + file2 = ImageFile( + source=FileBytes(data=MINIMAL_PNG + b"x", filename="test2.png") + ) + file3 = ImageFile( + source=FileBytes(data=MINIMAL_PNG + b"xx", filename="test3.png") + ) cache.set(file=file1, provider="gemini", file_id="file-1") cache.set(file=file2, provider="gemini", file_id="file-2") diff --git a/lib/crewai/pyproject.toml b/lib/crewai/pyproject.toml index f92be5ac7..ed2092381 100644 --- a/lib/crewai/pyproject.toml +++ b/lib/crewai/pyproject.toml @@ -99,13 +99,7 @@ a2a = [ "aiocache[redis,memcached]~=0.12.3", ] file-processing = [ - "Pillow~=10.4.0", - "pypdf~=4.0.0", - "python-magic>=0.4.27", - "aiocache~=0.12.3", - "aiofiles~=24.1.0", - "tinytag~=1.10.0", - "av~=13.0.0", + "crewai-files", ] @@ -133,6 +127,7 @@ torchvision = [ { index = "pytorch-nightly", marker = "python_version >= '3.13'" }, { index = "pytorch", marker = "python_version < '3.13'" }, ] +crewai-files = { workspace = true } [build-system] diff --git a/lib/crewai/src/crewai/__init__.py b/lib/crewai/src/crewai/__init__.py index e3d53bc9f..3976e89df 100644 --- a/lib/crewai/src/crewai/__init__.py +++ b/lib/crewai/src/crewai/__init__.py @@ -3,10 +3,7 @@ from typing import Any import urllib.request import warnings -from crewai.agent.core import Agent -from crewai.crew import Crew -from crewai.crews.crew_output import CrewOutput -from crewai.files import ( +from crewai_files import ( AudioFile, File, ImageFile, @@ -14,6 +11,10 @@ from crewai.files import ( TextFile, VideoFile, ) + +from crewai.agent.core import Agent +from crewai.crew import Crew +from crewai.crews.crew_output import CrewOutput from crewai.flow.flow import Flow from crewai.knowledge.knowledge import Knowledge from crewai.llm import LLM diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index 4ac9a96af..d72026abe 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -10,6 +10,7 @@ from collections.abc import Callable import logging from typing import TYPE_CHECKING, Any, Literal, cast +from crewai_files import FileProcessor from pydantic import BaseModel, GetCoreSchemaHandler, ValidationError from pydantic_core import CoreSchema, core_schema @@ -24,7 +25,6 @@ from crewai.events.types.logging_events import ( AgentLogsExecutionEvent, AgentLogsStartedEvent, ) -from crewai.files import FileProcessor from crewai.hooks.llm_hooks import ( get_after_llm_call_hooks, get_before_llm_call_hooks, @@ -238,7 +238,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): processor = FileProcessor(constraints=provider) files = processor.process_files(files) - from crewai.files import get_upload_cache + from crewai_files import get_upload_cache upload_cache = get_upload_cache() content_blocks = self.llm.format_multimodal_content( @@ -280,7 +280,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): processor = FileProcessor(constraints=provider) files = await processor.aprocess_files(files) - from crewai.files import get_upload_cache + from crewai_files import get_upload_cache upload_cache = get_upload_cache() content_blocks = await self.llm.aformat_multimodal_content( diff --git a/lib/crewai/src/crewai/crews/utils.py b/lib/crewai/src/crewai/crews/utils.py index f7f955886..897b56827 100644 --- a/lib/crewai/src/crewai/crews/utils.py +++ b/lib/crewai/src/crewai/crews/utils.py @@ -6,15 +6,16 @@ import asyncio from collections.abc import Callable, Coroutine, Iterable from typing import TYPE_CHECKING, Any -from crewai.agents.agent_builder.base_agent import BaseAgent -from crewai.crews.crew_output import CrewOutput -from crewai.files import ( +from crewai_files import ( AudioFile, ImageFile, PDFFile, TextFile, VideoFile, ) + +from crewai.agents.agent_builder.base_agent import BaseAgent +from crewai.crews.crew_output import CrewOutput from crewai.rag.embeddings.types import EmbedderConfig from crewai.types.streaming import CrewStreamingOutput, FlowStreamingOutput from crewai.utilities.file_store import store_files diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 9a6314126..a17a8c08b 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -53,6 +53,7 @@ from crewai.utilities.logger_utils import suppress_warnings if TYPE_CHECKING: + from crewai_files import FileInput, UploadCache from litellm.exceptions import ContextWindowExceededError from litellm.litellm_core_utils.get_supported_openai_params import ( get_supported_openai_params, @@ -66,7 +67,6 @@ if TYPE_CHECKING: from litellm.utils import supports_response_schema from crewai.agent.core import Agent - from crewai.files import FileInput, UploadCache from crewai.llms.hooks.base import BaseInterceptor from crewai.llms.providers.anthropic.completion import AnthropicThinkingConfig from crewai.task import Task @@ -2274,7 +2274,7 @@ class LLM(BaseLLM): """ import base64 - from crewai.files import ( + from crewai_files import ( FileResolver, FileResolverConfig, InlineBase64, diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index a87bfc70c..30be5fab1 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -32,8 +32,9 @@ from crewai.types.usage_metrics import UsageMetrics if TYPE_CHECKING: + from crewai_files import FileInput, UploadCache + from crewai.agent.core import Agent - from crewai.files import FileInput, UploadCache from crewai.task import Task from crewai.tools.base_tool import BaseTool from crewai.utilities.types import LLMMessage diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 1a62dca0e..6b0bb82cc 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -20,7 +20,8 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai.files import FileInput, UploadCache + from crewai_files import FileInput, UploadCache + from crewai.llms.hooks.base import BaseInterceptor DEFAULT_CACHE_TTL = "ephemeral" @@ -1281,7 +1282,7 @@ class AnthropicCompletion(BaseLLM): if not self.supports_multimodal(): return [] - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, @@ -1394,7 +1395,7 @@ class AnthropicCompletion(BaseLLM): if not self.supports_multimodal(): return [] - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index e14e0b42c..52dfee3eb 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -18,7 +18,8 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai.files import FileInput, UploadCache + from crewai_files import FileInput, UploadCache + from crewai.llms.hooks.base import BaseInterceptor @@ -1060,7 +1061,7 @@ class AzureCompletion(BaseLLM): if not self.supports_multimodal(): return [] - from crewai.files import ( + from crewai_files import ( FileResolver, FileResolverConfig, InlineBase64, @@ -1120,7 +1121,7 @@ class AzureCompletion(BaseLLM): if not self.supports_multimodal(): return [] - from crewai.files import ( + from crewai_files import ( FileResolver, FileResolverConfig, InlineBase64, diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index f62652efd..22ce0ed13 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -20,6 +20,7 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: + from crewai_files import FileInput, UploadCache from mypy_boto3_bedrock_runtime.type_defs import ( GuardrailConfigurationTypeDef, GuardrailStreamConfigurationTypeDef, @@ -32,7 +33,6 @@ if TYPE_CHECKING: ToolTypeDef, ) - from crewai.files import FileInput, UploadCache from crewai.llms.hooks.base import BaseInterceptor @@ -1586,7 +1586,7 @@ class BedrockCompletion(BaseLLM): import os - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, @@ -1714,7 +1714,7 @@ class BedrockCompletion(BaseLLM): import os - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index dc7feb917..97ef57315 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -19,10 +19,11 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: - from crewai.files import ( + from crewai_files import ( FileInput, UploadCache, ) + from crewai.llms.hooks.base import BaseInterceptor @@ -1113,7 +1114,7 @@ class GeminiCompletion(BaseLLM): Returns: List of content blocks in Gemini's expected format. """ - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, @@ -1183,7 +1184,7 @@ class GeminiCompletion(BaseLLM): Returns: List of content blocks in Gemini's expected format. """ - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index 53e8c6d6d..8b9e9a91a 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -27,8 +27,9 @@ from crewai.utilities.types import LLMMessage if TYPE_CHECKING: + from crewai_files import FileInput, UploadCache + from crewai.agent.core import Agent - from crewai.files import FileInput, UploadCache from crewai.llms.hooks.base import BaseInterceptor from crewai.task import Task from crewai.tools.base_tool import BaseTool @@ -1100,7 +1101,7 @@ class OpenAICompletion(BaseLLM): if not self.supports_multimodal(): return [] - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, @@ -1168,7 +1169,7 @@ class OpenAICompletion(BaseLLM): if not self.supports_multimodal(): return [] - from crewai.files import ( + from crewai_files import ( FileReference, FileResolver, FileResolverConfig, diff --git a/lib/crewai/src/crewai/task.py b/lib/crewai/src/crewai/task.py index 8c4b26608..e6ea561a3 100644 --- a/lib/crewai/src/crewai/task.py +++ b/lib/crewai/src/crewai/task.py @@ -19,6 +19,12 @@ from typing import ( import uuid import warnings +from crewai_files import ( + FileInput, + FilePath, + FileSourceInput, + normalize_input_files, +) from pydantic import ( UUID4, BaseModel, @@ -37,12 +43,6 @@ from crewai.events.types.task_events import ( TaskFailedEvent, TaskStartedEvent, ) -from crewai.files import ( - FileInput, - FilePath, - FileSourceInput, - normalize_input_files, -) from crewai.security import Fingerprint, SecurityConfig from crewai.tasks.output_format import OutputFormat from crewai.tasks.task_output import TaskOutput diff --git a/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py b/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py index 43d47529a..e41d5390d 100644 --- a/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py +++ b/lib/crewai/src/crewai/tools/agent_tools/read_file_tool.py @@ -11,7 +11,7 @@ from crewai.tools.base_tool import BaseTool if TYPE_CHECKING: - from crewai.files import FileInput + from crewai_files import FileInput class ReadFileToolSchema(BaseModel): diff --git a/lib/crewai/src/crewai/utilities/file_store.py b/lib/crewai/src/crewai/utilities/file_store.py index dedb46079..837cc5cbc 100644 --- a/lib/crewai/src/crewai/utilities/file_store.py +++ b/lib/crewai/src/crewai/utilities/file_store.py @@ -13,7 +13,7 @@ from aiocache.serializers import PickleSerializer # type: ignore[import-untyped if TYPE_CHECKING: - from crewai.files import FileInput + from crewai_files import FileInput _file_store = Cache(Cache.MEMORY, serializer=PickleSerializer()) diff --git a/lib/crewai/src/crewai/utilities/files/__init__.py b/lib/crewai/src/crewai/utilities/files/__init__.py deleted file mode 100644 index 8e7bb3972..000000000 --- a/lib/crewai/src/crewai/utilities/files/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Backwards compatibility re-exports from crewai.files. - -Deprecated: Import from crewai.files instead. -""" - -import sys -from typing import Any - -from typing_extensions import deprecated - -import crewai.files as _files - - -@deprecated("crewai.utilities.files is deprecated. Import from crewai.files instead.") -class _DeprecatedModule: - """Deprecated module wrapper.""" - - def __getattr__(self, name: str) -> Any: - return getattr(_files, name) - - def __dir__(self) -> list[str]: - return list(_files.__all__) - - -sys.modules[__name__] = _DeprecatedModule() # type: ignore[assignment] diff --git a/lib/crewai/src/crewai/utilities/files/__init__.pyi b/lib/crewai/src/crewai/utilities/files/__init__.pyi deleted file mode 100644 index 872245260..000000000 --- a/lib/crewai/src/crewai/utilities/files/__init__.pyi +++ /dev/null @@ -1,258 +0,0 @@ -"""Type stubs for backwards compatibility re-exports from crewai.files. - -.. deprecated:: - Import from crewai.files instead. -""" - -from collections.abc import Callable -from datetime import datetime -from pathlib import Path -from typing import Any, Literal - -from typing_extensions import deprecated - -import crewai.files as _files - -FileMode = Literal["strict", "auto", "warn", "chunk"] -ImageExtension = _files.ImageExtension -ImageContentType = _files.ImageContentType -PDFExtension = _files.PDFExtension -PDFContentType = _files.PDFContentType -TextExtension = _files.TextExtension -TextContentType = _files.TextContentType -AudioExtension = _files.AudioExtension -AudioContentType = _files.AudioContentType -VideoExtension = _files.VideoExtension -VideoContentType = _files.VideoContentType -FileInput = _files.FileInput -FileSource = _files.FileSource -FileSourceInput = _files.FileSourceInput -RawFileInput = _files.RawFileInput -ResolvedFileType = _files.ResolvedFileType -FileHandling = _files.FileHandling - -# Deprecated classes -@deprecated("Import from crewai.files instead") -class BaseFile(_files.BaseFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class ImageFile(_files.ImageFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class PDFFile(_files.PDFFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class TextFile(_files.TextFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class AudioFile(_files.AudioFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class VideoFile(_files.VideoFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class File(_files.File): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FilePath(_files.FilePath): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileBytes(_files.FileBytes): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileStream(_files.FileStream): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileResolver(_files.FileResolver): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileResolverConfig(_files.FileResolverConfig): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileProcessor(_files.FileProcessor): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileUploader(_files.FileUploader): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class UploadCache(_files.UploadCache): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class CachedUpload(_files.CachedUpload): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class UploadResult(_files.UploadResult): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class ResolvedFile(_files.ResolvedFile): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileReference(_files.FileReference): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class UrlReference(_files.UrlReference): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class InlineBase64(_files.InlineBase64): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class InlineBytes(_files.InlineBytes): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class ProviderConstraints(_files.ProviderConstraints): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class ImageConstraints(_files.ImageConstraints): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class AudioConstraints(_files.AudioConstraints): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class VideoConstraints(_files.VideoConstraints): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class PDFConstraints(_files.PDFConstraints): - """.. deprecated:: Import from crewai.files instead.""" - ... - -# Exceptions -@deprecated("Import from crewai.files instead") -class FileProcessingError(_files.FileProcessingError): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileValidationError(_files.FileValidationError): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class FileTooLargeError(_files.FileTooLargeError): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class UnsupportedFileTypeError(_files.UnsupportedFileTypeError): - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -class ProcessingDependencyError(_files.ProcessingDependencyError): - """.. deprecated:: Import from crewai.files instead.""" - ... - -# Constants -OPENAI_CONSTRAINTS: _files.ProviderConstraints -ANTHROPIC_CONSTRAINTS: _files.ProviderConstraints -GEMINI_CONSTRAINTS: _files.ProviderConstraints -BEDROCK_CONSTRAINTS: _files.ProviderConstraints - -# Deprecated functions -@deprecated("Import from crewai.files instead") -def create_resolver( - provider: str, - config: FileResolverConfig | None = None, -) -> FileResolver: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def get_uploader(provider: str, **kwargs: Any) -> FileUploader | None: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def get_upload_cache() -> UploadCache: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def reset_upload_cache() -> None: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def get_constraints_for_provider(provider: str) -> ProviderConstraints: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def cleanup_uploaded_files(provider: str | None = None) -> int: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def cleanup_expired_files() -> int: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def cleanup_provider_files(provider: str) -> int: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def normalize_input_files( - input_files: list[FileSourceInput | FileInput], -) -> dict[str, FileInput]: - """.. deprecated:: Import from crewai.files instead.""" - ... - -@deprecated("Import from crewai.files instead") -def wrap_file_source(source: FileSource) -> FileInput: - """.. deprecated:: Import from crewai.files instead.""" - ... - -__all__: list[str] \ No newline at end of file diff --git a/lib/crewai/src/crewai/utilities/types.py b/lib/crewai/src/crewai/utilities/types.py index 9f616c1c8..95699bc63 100644 --- a/lib/crewai/src/crewai/utilities/types.py +++ b/lib/crewai/src/crewai/utilities/types.py @@ -2,7 +2,7 @@ from typing import Any, Literal, TypedDict -from crewai.files import FileInput +from crewai_files import FileInput class LLMMessage(TypedDict): diff --git a/lib/crewai/tests/files/__init__.py b/lib/crewai/tests/files/__init__.py deleted file mode 100644 index cfe6c032e..000000000 --- a/lib/crewai/tests/files/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for file processing utilities.""" diff --git a/lib/crewai/tests/files/processing/__init__.py b/lib/crewai/tests/files/processing/__init__.py deleted file mode 100644 index 0ef74ec12..000000000 --- a/lib/crewai/tests/files/processing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for file processing module.""" diff --git a/pyproject.toml b/pyproject.toml index 975e05b80..df0a62288 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,7 @@ ignore-decorators = ["typing.overload"] [tool.ruff.lint.per-file-ignores] "lib/crewai/tests/**/*.py" = ["S101", "RET504", "S105", "S106"] # Allow assert statements, unnecessary assignments, and hardcoded passwords in tests "lib/crewai-tools/tests/**/*.py" = ["S101", "RET504", "S105", "S106", "RUF012", "N818", "E402", "RUF043", "S110", "B017"] # Allow various test-specific patterns +"lib/crewai-files/tests/**/*.py" = ["S101", "RET504", "S105", "S106", "B017", "F841"] # Allow assert statements and blind exception assertions in tests [tool.mypy] @@ -117,7 +118,7 @@ warn_return_any = true show_error_codes = true warn_unused_ignores = true python_version = "3.12" -exclude = "(?x)(^lib/crewai/src/crewai/cli/templates/ | ^lib/crewai/tests/ | ^lib/crewai-tools/tests/)" +exclude = "(?x)(^lib/crewai/src/crewai/cli/templates/|^lib/crewai/tests/|^lib/crewai-tools/tests/|^lib/crewai-files/tests/)" plugins = ["pydantic.mypy"] @@ -132,6 +133,7 @@ markers = [ testpaths = [ "lib/crewai/tests", "lib/crewai-tools/tests", + "lib/crewai-files/tests", ] asyncio_mode = "strict" asyncio_default_fixture_loop_scope = "function" @@ -146,6 +148,7 @@ members = [ "lib/crewai", "lib/crewai-tools", "lib/devtools", + "lib/crewai-files", ] @@ -153,3 +156,4 @@ members = [ crewai = { workspace = true } crewai-tools = { workspace = true } crewai-devtools = { workspace = true } +crewai-files = { workspace = true } diff --git a/uv.lock b/uv.lock index d94c4bda5..3d0335669 100644 --- a/uv.lock +++ b/uv.lock @@ -32,6 +32,7 @@ resolution-markers = [ members = [ "crewai", "crewai-devtools", + "crewai-files", "crewai-tools", ] @@ -1239,13 +1240,7 @@ embeddings = [ { name = "tiktoken" }, ] file-processing = [ - { name = "aiocache" }, - { name = "aiofiles" }, - { name = "av" }, - { name = "pillow" }, - { name = "pypdf" }, - { name = "python-magic" }, - { name = "tinytag" }, + { name = "crewai-files" }, ] google-genai = [ { name = "google-genai" }, @@ -1279,18 +1274,16 @@ watson = [ requires-dist = [ { name = "a2a-sdk", marker = "extra == 'a2a'", specifier = "~=0.3.10" }, { name = "aiobotocore", marker = "extra == 'aws'", specifier = "~=2.25.2" }, - { name = "aiocache", marker = "extra == 'file-processing'", specifier = "~=0.12.3" }, { name = "aiocache", extras = ["memcached", "redis"], marker = "extra == 'a2a'", specifier = "~=0.12.3" }, - { name = "aiofiles", marker = "extra == 'file-processing'", specifier = "~=24.1.0" }, { name = "aiosqlite", specifier = "~=0.21.0" }, { name = "anthropic", marker = "extra == 'anthropic'", specifier = "~=0.71.0" }, { name = "appdirs", specifier = "~=1.4.4" }, - { name = "av", marker = "extra == 'file-processing'", specifier = "~=13.0.0" }, { name = "azure-ai-inference", marker = "extra == 'azure-ai-inference'", specifier = "~=1.0.0b9" }, { name = "boto3", marker = "extra == 'aws'", specifier = "~=1.40.38" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = "~=1.40.45" }, { name = "chromadb", specifier = "~=1.1.0" }, { name = "click", specifier = "~=8.1.7" }, + { name = "crewai-files", marker = "extra == 'file-processing'", editable = "lib/crewai-files" }, { name = "crewai-tools", marker = "extra == 'tools'", editable = "lib/crewai-tools" }, { name = "docling", marker = "extra == 'docling'", specifier = "~=2.63.0" }, { name = "google-genai", marker = "extra == 'google-genai'", specifier = "~=1.49.0" }, @@ -1312,18 +1305,14 @@ requires-dist = [ { name = "opentelemetry-sdk", specifier = "~=1.34.0" }, { name = "pandas", marker = "extra == 'pandas'", specifier = "~=2.2.3" }, { name = "pdfplumber", specifier = "~=0.11.4" }, - { name = "pillow", marker = "extra == 'file-processing'", specifier = "~=10.4.0" }, { name = "portalocker", specifier = "~=2.7.0" }, { name = "pydantic", specifier = "~=2.11.9" }, { name = "pydantic-settings", specifier = "~=2.10.1" }, { name = "pyjwt", specifier = "~=2.9.0" }, - { name = "pypdf", marker = "extra == 'file-processing'", specifier = "~=4.0.0" }, { name = "python-dotenv", specifier = "~=1.1.1" }, - { name = "python-magic", marker = "extra == 'file-processing'", specifier = ">=0.4.27" }, { name = "qdrant-client", extras = ["fastembed"], marker = "extra == 'qdrant'", specifier = "~=1.14.3" }, { name = "regex", specifier = "~=2024.9.11" }, { name = "tiktoken", marker = "extra == 'embeddings'", specifier = "~=0.8.0" }, - { name = "tinytag", marker = "extra == 'file-processing'", specifier = "~=1.10.0" }, { name = "tokenizers", specifier = "~=0.20.3" }, { name = "tomli", specifier = "~=2.0.2" }, { name = "tomli-w", specifier = "~=1.1.0" }, @@ -1354,6 +1343,31 @@ requires-dist = [ { name = "toml", specifier = "~=0.10.2" }, ] +[[package]] +name = "crewai-files" +version = "0.1.0" +source = { editable = "lib/crewai-files" } +dependencies = [ + { name = "aiocache" }, + { name = "aiofiles" }, + { name = "av" }, + { name = "pillow" }, + { name = "pypdf" }, + { name = "python-magic" }, + { name = "tinytag" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiocache", specifier = "~=0.12.3" }, + { name = "aiofiles", specifier = "~=24.1.0" }, + { name = "av", specifier = "~=13.0.0" }, + { name = "pillow", specifier = "~=10.4.0" }, + { name = "pypdf", specifier = "~=4.0.0" }, + { name = "python-magic", specifier = ">=0.4.27" }, + { name = "tinytag", specifier = "~=1.10.0" }, +] + [[package]] name = "crewai-tools" source = { editable = "lib/crewai-tools" }