mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-04 08:42:38 +00:00
refactor: extract files module to standalone crewai-files package
This commit is contained in:
374
lib/crewai-files/src/crewai_files/cache/cleanup.py
vendored
Normal file
374
lib/crewai-files/src/crewai_files/cache/cleanup.py
vendored
Normal file
@@ -0,0 +1,374 @@
|
||||
"""Cleanup utilities for uploaded files."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from crewai_files.cache.upload_cache import CachedUpload, UploadCache
|
||||
from crewai_files.uploaders import get_uploader
|
||||
from crewai_files.uploaders.factory import ProviderType
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from crewai_files.uploaders.base import FileUploader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _safe_delete(
|
||||
uploader: FileUploader,
|
||||
file_id: str,
|
||||
provider: str,
|
||||
) -> bool:
|
||||
"""Safely delete a file, logging any errors.
|
||||
|
||||
Args:
|
||||
uploader: The file uploader to use.
|
||||
file_id: The file ID to delete.
|
||||
provider: Provider name for logging.
|
||||
|
||||
Returns:
|
||||
True if deleted successfully, False otherwise.
|
||||
"""
|
||||
try:
|
||||
if uploader.delete(file_id):
|
||||
logger.debug(f"Deleted {file_id} from {provider}")
|
||||
return True
|
||||
logger.warning(f"Failed to delete {file_id} from {provider}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Error deleting {file_id} from {provider}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def cleanup_uploaded_files(
|
||||
cache: UploadCache,
|
||||
*,
|
||||
delete_from_provider: bool = True,
|
||||
providers: list[ProviderType] | None = None,
|
||||
) -> int:
|
||||
"""Clean up uploaded files from the cache and optionally from providers.
|
||||
|
||||
Args:
|
||||
cache: The upload cache to clean up.
|
||||
delete_from_provider: If True, delete files from the provider as well.
|
||||
providers: Optional list of providers to clean up. If None, cleans all.
|
||||
|
||||
Returns:
|
||||
Number of files cleaned up.
|
||||
"""
|
||||
cleaned = 0
|
||||
|
||||
provider_uploads: dict[ProviderType, list[CachedUpload]] = {}
|
||||
|
||||
for provider in _get_providers_from_cache(cache):
|
||||
if providers is not None and provider not in providers:
|
||||
continue
|
||||
provider_uploads[provider] = cache.get_all_for_provider(provider)
|
||||
|
||||
if delete_from_provider:
|
||||
for provider, uploads in provider_uploads.items():
|
||||
uploader = get_uploader(provider)
|
||||
if uploader is None:
|
||||
logger.warning(
|
||||
f"No uploader available for {provider}, skipping cleanup"
|
||||
)
|
||||
continue
|
||||
|
||||
for upload in uploads:
|
||||
if _safe_delete(uploader, upload.file_id, provider):
|
||||
cleaned += 1
|
||||
|
||||
cache.clear()
|
||||
|
||||
logger.info(f"Cleaned up {cleaned} uploaded files")
|
||||
return cleaned
|
||||
|
||||
|
||||
def cleanup_expired_files(
|
||||
cache: UploadCache,
|
||||
*,
|
||||
delete_from_provider: bool = False,
|
||||
) -> int:
|
||||
"""Clean up expired files from the cache.
|
||||
|
||||
Args:
|
||||
cache: The upload cache to clean up.
|
||||
delete_from_provider: If True, attempt to delete from provider as well.
|
||||
Note: Expired files may already be deleted by the provider.
|
||||
|
||||
Returns:
|
||||
Number of expired entries removed from cache.
|
||||
"""
|
||||
expired_entries: list[CachedUpload] = []
|
||||
|
||||
if delete_from_provider:
|
||||
for provider in _get_providers_from_cache(cache):
|
||||
expired_entries.extend(
|
||||
upload
|
||||
for upload in cache.get_all_for_provider(provider)
|
||||
if upload.is_expired()
|
||||
)
|
||||
|
||||
removed = cache.clear_expired()
|
||||
|
||||
if delete_from_provider:
|
||||
for upload in expired_entries:
|
||||
uploader = get_uploader(upload.provider)
|
||||
if uploader is not None:
|
||||
try:
|
||||
uploader.delete(upload.file_id)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not delete expired file {upload.file_id}: {e}")
|
||||
|
||||
return removed
|
||||
|
||||
|
||||
def cleanup_provider_files(
|
||||
provider: ProviderType,
|
||||
*,
|
||||
cache: UploadCache | None = None,
|
||||
delete_all_from_provider: bool = False,
|
||||
) -> int:
|
||||
"""Clean up all files for a specific provider.
|
||||
|
||||
Args:
|
||||
provider: Provider name to clean up.
|
||||
cache: Optional upload cache to clear entries from.
|
||||
delete_all_from_provider: If True, delete all files from the provider,
|
||||
not just cached ones.
|
||||
|
||||
Returns:
|
||||
Number of files deleted.
|
||||
"""
|
||||
deleted = 0
|
||||
uploader = get_uploader(provider)
|
||||
|
||||
if uploader is None:
|
||||
logger.warning(f"No uploader available for {provider}")
|
||||
return 0
|
||||
|
||||
if delete_all_from_provider:
|
||||
try:
|
||||
files = uploader.list_files()
|
||||
for file_info in files:
|
||||
file_id = file_info.get("id") or file_info.get("name")
|
||||
if file_id and uploader.delete(file_id):
|
||||
deleted += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Error listing/deleting files from {provider}: {e}")
|
||||
elif cache is not None:
|
||||
uploads = cache.get_all_for_provider(provider)
|
||||
for upload in uploads:
|
||||
if _safe_delete(uploader, upload.file_id, provider):
|
||||
deleted += 1
|
||||
cache.remove_by_file_id(upload.file_id, provider)
|
||||
|
||||
logger.info(f"Deleted {deleted} files from {provider}")
|
||||
return deleted
|
||||
|
||||
|
||||
def _get_providers_from_cache(cache: UploadCache) -> set[ProviderType]:
|
||||
"""Get unique provider names from cache entries.
|
||||
|
||||
Args:
|
||||
cache: The upload cache.
|
||||
|
||||
Returns:
|
||||
Set of provider names.
|
||||
"""
|
||||
return cache.get_providers()
|
||||
|
||||
|
||||
async def _asafe_delete(
|
||||
uploader: FileUploader,
|
||||
file_id: str,
|
||||
provider: str,
|
||||
) -> bool:
|
||||
"""Async safely delete a file, logging any errors.
|
||||
|
||||
Args:
|
||||
uploader: The file uploader to use.
|
||||
file_id: The file ID to delete.
|
||||
provider: Provider name for logging.
|
||||
|
||||
Returns:
|
||||
True if deleted successfully, False otherwise.
|
||||
"""
|
||||
try:
|
||||
if await uploader.adelete(file_id):
|
||||
logger.debug(f"Deleted {file_id} from {provider}")
|
||||
return True
|
||||
logger.warning(f"Failed to delete {file_id} from {provider}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Error deleting {file_id} from {provider}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def acleanup_uploaded_files(
|
||||
cache: UploadCache,
|
||||
*,
|
||||
delete_from_provider: bool = True,
|
||||
providers: list[ProviderType] | None = None,
|
||||
max_concurrency: int = 10,
|
||||
) -> int:
|
||||
"""Async clean up uploaded files from the cache and optionally from providers.
|
||||
|
||||
Args:
|
||||
cache: The upload cache to clean up.
|
||||
delete_from_provider: If True, delete files from the provider as well.
|
||||
providers: Optional list of providers to clean up. If None, cleans all.
|
||||
max_concurrency: Maximum number of concurrent delete operations.
|
||||
|
||||
Returns:
|
||||
Number of files cleaned up.
|
||||
"""
|
||||
cleaned = 0
|
||||
|
||||
provider_uploads: dict[ProviderType, list[CachedUpload]] = {}
|
||||
|
||||
for provider in _get_providers_from_cache(cache):
|
||||
if providers is not None and provider not in providers:
|
||||
continue
|
||||
provider_uploads[provider] = await cache.aget_all_for_provider(provider)
|
||||
|
||||
if delete_from_provider:
|
||||
semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
async def delete_one(file_uploader: FileUploader, cached: CachedUpload) -> bool:
|
||||
"""Delete a single file with semaphore limiting."""
|
||||
async with semaphore:
|
||||
return await _asafe_delete(
|
||||
file_uploader, cached.file_id, cached.provider
|
||||
)
|
||||
|
||||
tasks: list[asyncio.Task[bool]] = []
|
||||
for provider, uploads in provider_uploads.items():
|
||||
uploader = get_uploader(provider)
|
||||
if uploader is None:
|
||||
logger.warning(
|
||||
f"No uploader available for {provider}, skipping cleanup"
|
||||
)
|
||||
continue
|
||||
|
||||
tasks.extend(
|
||||
asyncio.create_task(delete_one(uploader, cached)) for cached in uploads
|
||||
)
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
cleaned = sum(1 for r in results if r is True)
|
||||
|
||||
await cache.aclear()
|
||||
|
||||
logger.info(f"Cleaned up {cleaned} uploaded files")
|
||||
return cleaned
|
||||
|
||||
|
||||
async def acleanup_expired_files(
|
||||
cache: UploadCache,
|
||||
*,
|
||||
delete_from_provider: bool = False,
|
||||
max_concurrency: int = 10,
|
||||
) -> int:
|
||||
"""Async clean up expired files from the cache.
|
||||
|
||||
Args:
|
||||
cache: The upload cache to clean up.
|
||||
delete_from_provider: If True, attempt to delete from provider as well.
|
||||
max_concurrency: Maximum number of concurrent delete operations.
|
||||
|
||||
Returns:
|
||||
Number of expired entries removed from cache.
|
||||
"""
|
||||
expired_entries: list[CachedUpload] = []
|
||||
|
||||
if delete_from_provider:
|
||||
for provider in _get_providers_from_cache(cache):
|
||||
uploads = await cache.aget_all_for_provider(provider)
|
||||
expired_entries.extend(upload for upload in uploads if upload.is_expired())
|
||||
|
||||
removed = await cache.aclear_expired()
|
||||
|
||||
if delete_from_provider and expired_entries:
|
||||
semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
async def delete_expired(cached: CachedUpload) -> None:
|
||||
"""Delete an expired file with semaphore limiting."""
|
||||
async with semaphore:
|
||||
file_uploader = get_uploader(cached.provider)
|
||||
if file_uploader is not None:
|
||||
try:
|
||||
await file_uploader.adelete(cached.file_id)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Could not delete expired file {cached.file_id}: {e}"
|
||||
)
|
||||
|
||||
await asyncio.gather(
|
||||
*[delete_expired(cached) for cached in expired_entries],
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
return removed
|
||||
|
||||
|
||||
async def acleanup_provider_files(
|
||||
provider: ProviderType,
|
||||
*,
|
||||
cache: UploadCache | None = None,
|
||||
delete_all_from_provider: bool = False,
|
||||
max_concurrency: int = 10,
|
||||
) -> int:
|
||||
"""Async clean up all files for a specific provider.
|
||||
|
||||
Args:
|
||||
provider: Provider name to clean up.
|
||||
cache: Optional upload cache to clear entries from.
|
||||
delete_all_from_provider: If True, delete all files from the provider.
|
||||
max_concurrency: Maximum number of concurrent delete operations.
|
||||
|
||||
Returns:
|
||||
Number of files deleted.
|
||||
"""
|
||||
deleted = 0
|
||||
uploader = get_uploader(provider)
|
||||
|
||||
if uploader is None:
|
||||
logger.warning(f"No uploader available for {provider}")
|
||||
return 0
|
||||
|
||||
semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
async def delete_single(target_file_id: str) -> bool:
|
||||
"""Delete a single file with semaphore limiting."""
|
||||
async with semaphore:
|
||||
return await uploader.adelete(target_file_id)
|
||||
|
||||
if delete_all_from_provider:
|
||||
try:
|
||||
files = uploader.list_files()
|
||||
tasks = []
|
||||
for file_info in files:
|
||||
fid = file_info.get("id") or file_info.get("name")
|
||||
if fid:
|
||||
tasks.append(delete_single(fid))
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
deleted = sum(1 for r in results if r is True)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error listing/deleting files from {provider}: {e}")
|
||||
elif cache is not None:
|
||||
uploads = await cache.aget_all_for_provider(provider)
|
||||
tasks = []
|
||||
for upload in uploads:
|
||||
tasks.append(delete_single(upload.file_id))
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
for upload, result in zip(uploads, results, strict=False):
|
||||
if result is True:
|
||||
deleted += 1
|
||||
await cache.aremove_by_file_id(upload.file_id, provider)
|
||||
|
||||
logger.info(f"Deleted {deleted} files from {provider}")
|
||||
return deleted
|
||||
Reference in New Issue
Block a user