mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-24 07:38:14 +00:00
375 lines
12 KiB
Python
375 lines
12 KiB
Python
"""Cleanup utilities for uploaded files."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import TYPE_CHECKING
|
|
|
|
from crewai_files.cache.upload_cache import CachedUpload, UploadCache
|
|
from crewai_files.uploaders import get_uploader
|
|
from crewai_files.uploaders.factory import ProviderType
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
from crewai_files.uploaders.base import FileUploader
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _safe_delete(
|
|
uploader: FileUploader,
|
|
file_id: str,
|
|
provider: str,
|
|
) -> bool:
|
|
"""Safely delete a file, logging any errors.
|
|
|
|
Args:
|
|
uploader: The file uploader to use.
|
|
file_id: The file ID to delete.
|
|
provider: Provider name for logging.
|
|
|
|
Returns:
|
|
True if deleted successfully, False otherwise.
|
|
"""
|
|
try:
|
|
if uploader.delete(file_id):
|
|
logger.debug(f"Deleted {file_id} from {provider}")
|
|
return True
|
|
logger.warning(f"Failed to delete {file_id} from {provider}")
|
|
return False
|
|
except Exception as e:
|
|
logger.warning(f"Error deleting {file_id} from {provider}: {e}")
|
|
return False
|
|
|
|
|
|
def cleanup_uploaded_files(
|
|
cache: UploadCache,
|
|
*,
|
|
delete_from_provider: bool = True,
|
|
providers: list[ProviderType] | None = None,
|
|
) -> int:
|
|
"""Clean up uploaded files from the cache and optionally from providers.
|
|
|
|
Args:
|
|
cache: The upload cache to clean up.
|
|
delete_from_provider: If True, delete files from the provider as well.
|
|
providers: Optional list of providers to clean up. If None, cleans all.
|
|
|
|
Returns:
|
|
Number of files cleaned up.
|
|
"""
|
|
cleaned = 0
|
|
|
|
provider_uploads: dict[ProviderType, list[CachedUpload]] = {}
|
|
|
|
for provider in _get_providers_from_cache(cache):
|
|
if providers is not None and provider not in providers:
|
|
continue
|
|
provider_uploads[provider] = cache.get_all_for_provider(provider)
|
|
|
|
if delete_from_provider:
|
|
for provider, uploads in provider_uploads.items():
|
|
uploader = get_uploader(provider)
|
|
if uploader is None:
|
|
logger.warning(
|
|
f"No uploader available for {provider}, skipping cleanup"
|
|
)
|
|
continue
|
|
|
|
for upload in uploads:
|
|
if _safe_delete(uploader, upload.file_id, provider):
|
|
cleaned += 1
|
|
|
|
cache.clear()
|
|
|
|
logger.info(f"Cleaned up {cleaned} uploaded files")
|
|
return cleaned
|
|
|
|
|
|
def cleanup_expired_files(
|
|
cache: UploadCache,
|
|
*,
|
|
delete_from_provider: bool = False,
|
|
) -> int:
|
|
"""Clean up expired files from the cache.
|
|
|
|
Args:
|
|
cache: The upload cache to clean up.
|
|
delete_from_provider: If True, attempt to delete from provider as well.
|
|
Note: Expired files may already be deleted by the provider.
|
|
|
|
Returns:
|
|
Number of expired entries removed from cache.
|
|
"""
|
|
expired_entries: list[CachedUpload] = []
|
|
|
|
if delete_from_provider:
|
|
for provider in _get_providers_from_cache(cache):
|
|
expired_entries.extend(
|
|
upload
|
|
for upload in cache.get_all_for_provider(provider)
|
|
if upload.is_expired()
|
|
)
|
|
|
|
removed = cache.clear_expired()
|
|
|
|
if delete_from_provider:
|
|
for upload in expired_entries:
|
|
uploader = get_uploader(upload.provider)
|
|
if uploader is not None:
|
|
try:
|
|
uploader.delete(upload.file_id)
|
|
except Exception as e:
|
|
logger.debug(f"Could not delete expired file {upload.file_id}: {e}")
|
|
|
|
return removed
|
|
|
|
|
|
def cleanup_provider_files(
|
|
provider: ProviderType,
|
|
*,
|
|
cache: UploadCache | None = None,
|
|
delete_all_from_provider: bool = False,
|
|
) -> int:
|
|
"""Clean up all files for a specific provider.
|
|
|
|
Args:
|
|
provider: Provider name to clean up.
|
|
cache: Optional upload cache to clear entries from.
|
|
delete_all_from_provider: If True, delete all files from the provider,
|
|
not just cached ones.
|
|
|
|
Returns:
|
|
Number of files deleted.
|
|
"""
|
|
deleted = 0
|
|
uploader = get_uploader(provider)
|
|
|
|
if uploader is None:
|
|
logger.warning(f"No uploader available for {provider}")
|
|
return 0
|
|
|
|
if delete_all_from_provider:
|
|
try:
|
|
files = uploader.list_files()
|
|
for file_info in files:
|
|
file_id = file_info.get("id") or file_info.get("name")
|
|
if file_id and uploader.delete(file_id):
|
|
deleted += 1
|
|
except Exception as e:
|
|
logger.warning(f"Error listing/deleting files from {provider}: {e}")
|
|
elif cache is not None:
|
|
uploads = cache.get_all_for_provider(provider)
|
|
for upload in uploads:
|
|
if _safe_delete(uploader, upload.file_id, provider):
|
|
deleted += 1
|
|
cache.remove_by_file_id(upload.file_id, provider)
|
|
|
|
logger.info(f"Deleted {deleted} files from {provider}")
|
|
return deleted
|
|
|
|
|
|
def _get_providers_from_cache(cache: UploadCache) -> set[ProviderType]:
|
|
"""Get unique provider names from cache entries.
|
|
|
|
Args:
|
|
cache: The upload cache.
|
|
|
|
Returns:
|
|
Set of provider names.
|
|
"""
|
|
return cache.get_providers()
|
|
|
|
|
|
async def _asafe_delete(
|
|
uploader: FileUploader,
|
|
file_id: str,
|
|
provider: str,
|
|
) -> bool:
|
|
"""Async safely delete a file, logging any errors.
|
|
|
|
Args:
|
|
uploader: The file uploader to use.
|
|
file_id: The file ID to delete.
|
|
provider: Provider name for logging.
|
|
|
|
Returns:
|
|
True if deleted successfully, False otherwise.
|
|
"""
|
|
try:
|
|
if await uploader.adelete(file_id):
|
|
logger.debug(f"Deleted {file_id} from {provider}")
|
|
return True
|
|
logger.warning(f"Failed to delete {file_id} from {provider}")
|
|
return False
|
|
except Exception as e:
|
|
logger.warning(f"Error deleting {file_id} from {provider}: {e}")
|
|
return False
|
|
|
|
|
|
async def acleanup_uploaded_files(
|
|
cache: UploadCache,
|
|
*,
|
|
delete_from_provider: bool = True,
|
|
providers: list[ProviderType] | None = None,
|
|
max_concurrency: int = 10,
|
|
) -> int:
|
|
"""Async clean up uploaded files from the cache and optionally from providers.
|
|
|
|
Args:
|
|
cache: The upload cache to clean up.
|
|
delete_from_provider: If True, delete files from the provider as well.
|
|
providers: Optional list of providers to clean up. If None, cleans all.
|
|
max_concurrency: Maximum number of concurrent delete operations.
|
|
|
|
Returns:
|
|
Number of files cleaned up.
|
|
"""
|
|
cleaned = 0
|
|
|
|
provider_uploads: dict[ProviderType, list[CachedUpload]] = {}
|
|
|
|
for provider in _get_providers_from_cache(cache):
|
|
if providers is not None and provider not in providers:
|
|
continue
|
|
provider_uploads[provider] = await cache.aget_all_for_provider(provider)
|
|
|
|
if delete_from_provider:
|
|
semaphore = asyncio.Semaphore(max_concurrency)
|
|
|
|
async def delete_one(file_uploader: FileUploader, cached: CachedUpload) -> bool:
|
|
"""Delete a single file with semaphore limiting."""
|
|
async with semaphore:
|
|
return await _asafe_delete(
|
|
file_uploader, cached.file_id, cached.provider
|
|
)
|
|
|
|
tasks: list[asyncio.Task[bool]] = []
|
|
for provider, uploads in provider_uploads.items():
|
|
uploader = get_uploader(provider)
|
|
if uploader is None:
|
|
logger.warning(
|
|
f"No uploader available for {provider}, skipping cleanup"
|
|
)
|
|
continue
|
|
|
|
tasks.extend(
|
|
asyncio.create_task(delete_one(uploader, cached)) for cached in uploads
|
|
)
|
|
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
cleaned = sum(1 for r in results if r is True)
|
|
|
|
await cache.aclear()
|
|
|
|
logger.info(f"Cleaned up {cleaned} uploaded files")
|
|
return cleaned
|
|
|
|
|
|
async def acleanup_expired_files(
|
|
cache: UploadCache,
|
|
*,
|
|
delete_from_provider: bool = False,
|
|
max_concurrency: int = 10,
|
|
) -> int:
|
|
"""Async clean up expired files from the cache.
|
|
|
|
Args:
|
|
cache: The upload cache to clean up.
|
|
delete_from_provider: If True, attempt to delete from provider as well.
|
|
max_concurrency: Maximum number of concurrent delete operations.
|
|
|
|
Returns:
|
|
Number of expired entries removed from cache.
|
|
"""
|
|
expired_entries: list[CachedUpload] = []
|
|
|
|
if delete_from_provider:
|
|
for provider in _get_providers_from_cache(cache):
|
|
uploads = await cache.aget_all_for_provider(provider)
|
|
expired_entries.extend(upload for upload in uploads if upload.is_expired())
|
|
|
|
removed = await cache.aclear_expired()
|
|
|
|
if delete_from_provider and expired_entries:
|
|
semaphore = asyncio.Semaphore(max_concurrency)
|
|
|
|
async def delete_expired(cached: CachedUpload) -> None:
|
|
"""Delete an expired file with semaphore limiting."""
|
|
async with semaphore:
|
|
file_uploader = get_uploader(cached.provider)
|
|
if file_uploader is not None:
|
|
try:
|
|
await file_uploader.adelete(cached.file_id)
|
|
except Exception as e:
|
|
logger.debug(
|
|
f"Could not delete expired file {cached.file_id}: {e}"
|
|
)
|
|
|
|
await asyncio.gather(
|
|
*[delete_expired(cached) for cached in expired_entries],
|
|
return_exceptions=True,
|
|
)
|
|
|
|
return removed
|
|
|
|
|
|
async def acleanup_provider_files(
|
|
provider: ProviderType,
|
|
*,
|
|
cache: UploadCache | None = None,
|
|
delete_all_from_provider: bool = False,
|
|
max_concurrency: int = 10,
|
|
) -> int:
|
|
"""Async clean up all files for a specific provider.
|
|
|
|
Args:
|
|
provider: Provider name to clean up.
|
|
cache: Optional upload cache to clear entries from.
|
|
delete_all_from_provider: If True, delete all files from the provider.
|
|
max_concurrency: Maximum number of concurrent delete operations.
|
|
|
|
Returns:
|
|
Number of files deleted.
|
|
"""
|
|
deleted = 0
|
|
uploader = get_uploader(provider)
|
|
|
|
if uploader is None:
|
|
logger.warning(f"No uploader available for {provider}")
|
|
return 0
|
|
|
|
semaphore = asyncio.Semaphore(max_concurrency)
|
|
|
|
async def delete_single(target_file_id: str) -> bool:
|
|
"""Delete a single file with semaphore limiting."""
|
|
async with semaphore:
|
|
return await uploader.adelete(target_file_id)
|
|
|
|
if delete_all_from_provider:
|
|
try:
|
|
files = uploader.list_files()
|
|
tasks = []
|
|
for file_info in files:
|
|
fid = file_info.get("id") or file_info.get("name")
|
|
if fid:
|
|
tasks.append(delete_single(fid))
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
deleted = sum(1 for r in results if r is True)
|
|
except Exception as e:
|
|
logger.warning(f"Error listing/deleting files from {provider}: {e}")
|
|
elif cache is not None:
|
|
uploads = await cache.aget_all_for_provider(provider)
|
|
tasks = []
|
|
for upload in uploads:
|
|
tasks.append(delete_single(upload.file_id))
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
for upload, result in zip(uploads, results, strict=False):
|
|
if result is True:
|
|
deleted += 1
|
|
await cache.aremove_by_file_id(upload.file_id, provider)
|
|
|
|
logger.info(f"Deleted {deleted} files from {provider}")
|
|
return deleted
|