chore: remove unnecessary comments and fix type errors

- Remove unnecessary block and inline comments from file utilities
- Fix mypy errors by using file.read() instead of file.source.read()
This commit is contained in:
Greyson LaLonde
2026-01-21 20:40:13 -05:00
parent e2c517d0a2
commit 4c0d99601c
12 changed files with 36 additions and 119 deletions

View File

@@ -64,7 +64,6 @@ class ReadFileTool(BaseTool):
content_type = file_input.content_type
filename = file_input.filename or file_name
# Text-based content types
text_types = (
"text/",
"application/json",
@@ -75,6 +74,5 @@ class ReadFileTool(BaseTool):
if any(content_type.startswith(t) for t in text_types):
return content.decode("utf-8")
# Binary content - return base64 encoded
encoded = base64.b64encode(content).decode("ascii")
return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}"

View File

@@ -34,18 +34,15 @@ def _run_sync(coro: Coroutine[None, None, T]) -> T:
"""
try:
asyncio.get_running_loop()
# We're in an async context - run in a thread pool
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(asyncio.run, coro)
return future.result()
except RuntimeError:
# No running loop - safe to create one
return asyncio.run(coro)
DEFAULT_TTL = 3600
# Key prefixes for different scopes
_CREW_PREFIX = "crew:"
_TASK_PREFIX = "task:"
@@ -149,7 +146,6 @@ async def aget_all_files(
if not crew_files and not task_files:
return None
# Merge with task files taking precedence
return {**crew_files, **(task_files or {})}

View File

@@ -96,7 +96,6 @@ def wrap_file_source(source: FileSource) -> FileInput:
return VideoFile(source=source)
if content_type == "application/pdf":
return PDFFile(source=source)
# Default to text for anything else
return TextFile(source=source)
@@ -116,10 +115,8 @@ def normalize_input_files(
result: dict[str, FileInput] = {}
for i, item in enumerate(input_files):
# If it's already a typed File wrapper, use it directly
if isinstance(item, BaseFile):
name = item.filename or f"file_{i}"
# Remove extension from name for cleaner keys
if "." in name:
name = name.rsplit(".", 1)[0]
result[name] = item

View File

@@ -108,45 +108,38 @@ class ProviderConstraints:
file_upload_threshold_bytes: int | None = None
# Anthropic constraints (Claude 3+)
# https://docs.anthropic.com/en/docs/build-with-claude/vision
ANTHROPIC_CONSTRAINTS = ProviderConstraints(
name="anthropic",
image=ImageConstraints(
max_size_bytes=5 * 1024 * 1024, # 5MB
max_size_bytes=5 * 1024 * 1024,
max_width=8000,
max_height=8000,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
),
pdf=PDFConstraints(
max_size_bytes=30 * 1024 * 1024, # 30MB
max_size_bytes=30 * 1024 * 1024,
max_pages=100,
),
supports_file_upload=True,
file_upload_threshold_bytes=5 * 1024 * 1024, # Use upload for files > 5MB
file_upload_threshold_bytes=5 * 1024 * 1024,
)
# OpenAI constraints (GPT-4o, GPT-4 Vision)
# https://platform.openai.com/docs/guides/vision
OPENAI_CONSTRAINTS = ProviderConstraints(
name="openai",
image=ImageConstraints(
max_size_bytes=20 * 1024 * 1024, # 20MB
max_size_bytes=20 * 1024 * 1024,
max_images_per_request=10,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
),
# OpenAI does not support PDFs natively
pdf=None,
supports_file_upload=True,
file_upload_threshold_bytes=5 * 1024 * 1024, # Use upload for files > 5MB
file_upload_threshold_bytes=5 * 1024 * 1024,
)
# Gemini constraints
# https://ai.google.dev/gemini-api/docs/vision
GEMINI_CONSTRAINTS = ProviderConstraints(
name="gemini",
image=ImageConstraints(
max_size_bytes=100 * 1024 * 1024, # 100MB inline
max_size_bytes=100 * 1024 * 1024,
supported_formats=(
"image/png",
"image/jpeg",
@@ -157,10 +150,10 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
),
),
pdf=PDFConstraints(
max_size_bytes=50 * 1024 * 1024, # 50MB inline
max_size_bytes=50 * 1024 * 1024,
),
audio=AudioConstraints(
max_size_bytes=100 * 1024 * 1024, # 100MB
max_size_bytes=100 * 1024 * 1024,
supported_formats=(
"audio/mp3",
"audio/mpeg",
@@ -173,7 +166,7 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
),
),
video=VideoConstraints(
max_size_bytes=2 * 1024 * 1024 * 1024, # 2GB via File API
max_size_bytes=2 * 1024 * 1024 * 1024,
supported_formats=(
"video/mp4",
"video/mpeg",
@@ -184,30 +177,27 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
),
),
supports_file_upload=True,
file_upload_threshold_bytes=20 * 1024 * 1024, # Use upload for files > 20MB
file_upload_threshold_bytes=20 * 1024 * 1024,
)
# AWS Bedrock constraints (Claude via Bedrock)
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
BEDROCK_CONSTRAINTS = ProviderConstraints(
name="bedrock",
image=ImageConstraints(
max_size_bytes=4_608_000, # ~4.5MB (encoded size limit)
max_size_bytes=4_608_000,
max_width=8000,
max_height=8000,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
),
pdf=PDFConstraints(
max_size_bytes=3_840_000, # ~3.75MB
max_size_bytes=3_840_000,
max_pages=100,
),
)
# Azure OpenAI constraints (same as OpenAI)
AZURE_CONSTRAINTS = ProviderConstraints(
name="azure",
image=ImageConstraints(
max_size_bytes=20 * 1024 * 1024, # 20MB
max_size_bytes=20 * 1024 * 1024,
max_images_per_request=10,
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
),
@@ -215,14 +205,12 @@ AZURE_CONSTRAINTS = ProviderConstraints(
)
# Provider name mapping for convenience
_PROVIDER_CONSTRAINTS_MAP: dict[str, ProviderConstraints] = {
"anthropic": ANTHROPIC_CONSTRAINTS,
"openai": OPENAI_CONSTRAINTS,
"gemini": GEMINI_CONSTRAINTS,
"bedrock": BEDROCK_CONSTRAINTS,
"azure": AZURE_CONSTRAINTS,
# Aliases
"claude": ANTHROPIC_CONSTRAINTS,
"gpt": OPENAI_CONSTRAINTS,
"google": GEMINI_CONSTRAINTS,
@@ -246,11 +234,9 @@ def get_constraints_for_provider(
provider_lower = provider.lower()
# Direct lookup
if provider_lower in _PROVIDER_CONSTRAINTS_MAP:
return _PROVIDER_CONSTRAINTS_MAP[provider_lower]
# Check if provider name contains any known provider
for key, constraints in _PROVIDER_CONSTRAINTS_MAP.items():
if key in provider_lower:
return constraints

View File

@@ -125,15 +125,12 @@ class FileProcessor:
mode = self._get_mode(file)
try:
# First validate
errors = self.validate(file)
if not errors:
return file
# Handle based on mode
if mode == FileHandling.STRICT:
# Errors should have already raised in validate()
raise FileValidationError("; ".join(errors), file_name=file.filename)
if mode == FileHandling.WARN:
@@ -178,7 +175,6 @@ class FileProcessor:
if isinstance(processed, Sequence) and not isinstance(
processed, (str, bytes)
):
# File was chunked - add each chunk with indexed name
for i, chunk in enumerate(processed):
chunk_name = f"{name}_chunk_{i}"
result[chunk_name] = chunk
@@ -203,15 +199,12 @@ class FileProcessor:
return self._auto_process_image(file)
if isinstance(file, PDFFile) and self.constraints.pdf is not None:
# PDFs can't easily be auto-compressed, log warning
logger.warning(
f"Cannot auto-compress PDF '{file.filename}'. "
"Consider using CHUNK mode for large PDFs."
)
return file
# Audio and video auto-processing would require additional dependencies
# For now, just warn
if isinstance(file, (AudioFile, VideoFile)):
logger.warning(
f"Auto-processing not supported for {type(file).__name__}. "
@@ -235,10 +228,9 @@ class FileProcessor:
image_constraints = self.constraints.image
processed = file
content = file.source.read()
content = file.read()
current_size = len(content)
# First, resize if dimensions exceed limits
if image_constraints.max_width or image_constraints.max_height:
dimensions = get_image_dimensions(file)
if dimensions:
@@ -249,12 +241,11 @@ class FileProcessor:
if width > max_w or height > max_h:
try:
processed = resize_image(file, max_w, max_h)
content = processed.source.read()
content = processed.read()
current_size = len(content)
except Exception as e:
logger.warning(f"Failed to resize image: {e}")
# Then, optimize if size still exceeds limits
if current_size > image_constraints.max_size_bytes:
try:
processed = optimize_image(processed, image_constraints.max_size_bytes)
@@ -290,7 +281,7 @@ class FileProcessor:
# Use general max size as character limit approximation
max_size = self.constraints.general_max_size_bytes
if max_size is not None:
content = file.source.read()
content = file.read()
if len(content) > max_size:
try:
return chunk_text(file, max_size)
@@ -298,7 +289,6 @@ class FileProcessor:
logger.warning(f"Failed to chunk text file: {e}")
return file
# For other file types, chunking is not supported
if isinstance(file, (ImageFile, AudioFile, VideoFile)):
logger.warning(
f"Chunking not supported for {type(file).__name__}. "

View File

@@ -42,17 +42,15 @@ def resize_image(
install_command="pip install Pillow",
) from e
content = file.source.read()
content = file.read()
with Image.open(io.BytesIO(content)) as img:
original_width, original_height = img.size
# Check if resize is needed
if original_width <= max_width and original_height <= max_height:
return file
if preserve_aspect_ratio:
# Calculate scaling factor to fit within bounds
width_ratio = max_width / original_width
height_ratio = max_height / original_height
scale_factor = min(width_ratio, height_ratio)
@@ -63,17 +61,13 @@ def resize_image(
new_width = min(original_width, max_width)
new_height = min(original_height, max_height)
# Resize the image
resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Determine output format
output_format = img.format or "PNG"
if output_format.upper() == "JPEG":
# Handle RGBA images for JPEG
if resized_img.mode in ("RGBA", "LA", "P"):
resized_img = resized_img.convert("RGB")
# Save to bytes
output_buffer = io.BytesIO()
resized_img.save(output_buffer, format=output_format)
output_bytes = output_buffer.getvalue()
@@ -118,15 +112,13 @@ def optimize_image(
install_command="pip install Pillow",
) from e
content = file.source.read()
content = file.read()
current_size = len(content)
# If already within target, return as-is
if current_size <= target_size_bytes:
return file
with Image.open(io.BytesIO(content)) as img:
# Convert to RGB for JPEG compression if needed
if img.mode in ("RGBA", "LA", "P"):
img = img.convert("RGB")
output_format = "JPEG"
@@ -138,7 +130,6 @@ def optimize_image(
quality = initial_quality
output_bytes = content
# Binary search for optimal quality
while len(output_bytes) > target_size_bytes and quality >= min_quality:
output_buffer = io.BytesIO()
img.save(
@@ -193,11 +184,10 @@ def chunk_pdf(
install_command="pip install pypdf",
) from e
content = file.source.read()
content = file.read()
reader = PdfReader(io.BytesIO(content))
total_pages = len(reader.pages)
# If within limit, return as-is
if total_pages <= max_pages:
return [file]
@@ -253,11 +243,10 @@ def chunk_text(
Returns:
List of TextFile objects, one per chunk.
"""
content = file.source.read()
content = file.read()
text = content.decode("utf-8", errors="replace")
total_chars = len(text)
# If within limit, return as-is
if total_chars <= max_chars:
return [file]
@@ -291,7 +280,6 @@ def chunk_text(
f"Created text chunk '{chunk_filename}' with {len(chunk_text)} characters"
)
# Move start position with overlap
start_pos = end_pos - overlap_chars if end_pos < total_chars else total_chars
chunk_num += 1
@@ -313,7 +301,7 @@ def get_image_dimensions(file: ImageFile) -> tuple[int, int] | None:
logger.warning("Pillow not installed - cannot get image dimensions")
return None
content = file.source.read()
content = file.read()
try:
with Image.open(io.BytesIO(content)) as img:
@@ -339,7 +327,7 @@ def get_pdf_page_count(file: PDFFile) -> int | None:
logger.warning("pypdf not installed - cannot get PDF page count")
return None
content = file.source.read()
content = file.read()
try:
reader = PdfReader(io.BytesIO(content))

View File

@@ -63,11 +63,10 @@ def validate_image(
UnsupportedFileTypeError: If the format is not supported.
"""
errors: list[str] = []
content = file.source.read()
content = file.read()
file_size = len(content)
filename = file.filename
# Check file size
if file_size > constraints.max_size_bytes:
msg = (
f"Image '{filename}' size ({_format_size(file_size)}) exceeds "
@@ -82,7 +81,6 @@ def validate_image(
max_size=constraints.max_size_bytes,
)
# Check format
content_type = file.content_type
if content_type not in constraints.supported_formats:
msg = (
@@ -95,7 +93,6 @@ def validate_image(
msg, file_name=filename, content_type=content_type
)
# Check dimensions if constraints specify them
if constraints.max_width is not None or constraints.max_height is not None:
try:
import io
@@ -153,11 +150,10 @@ def validate_pdf(
FileValidationError: If the file exceeds page limits.
"""
errors: list[str] = []
content = file.source.read()
content = file.read()
file_size = len(content)
filename = file.filename
# Check file size
if file_size > constraints.max_size_bytes:
msg = (
f"PDF '{filename}' size ({_format_size(file_size)}) exceeds "
@@ -172,7 +168,6 @@ def validate_pdf(
max_size=constraints.max_size_bytes,
)
# Check page count if constraint specifies it
if constraints.max_pages is not None:
try:
import io
@@ -221,11 +216,10 @@ def validate_audio(
UnsupportedFileTypeError: If the format is not supported.
"""
errors: list[str] = []
content = file.source.read()
content = file.read()
file_size = len(content)
filename = file.filename
# Check file size
if file_size > constraints.max_size_bytes:
msg = (
f"Audio '{filename}' size ({_format_size(file_size)}) exceeds "
@@ -240,7 +234,6 @@ def validate_audio(
max_size=constraints.max_size_bytes,
)
# Check format
content_type = file.content_type
if content_type not in constraints.supported_formats:
msg = (
@@ -277,11 +270,10 @@ def validate_video(
UnsupportedFileTypeError: If the format is not supported.
"""
errors: list[str] = []
content = file.source.read()
content = file.read()
file_size = len(content)
filename = file.filename
# Check file size
if file_size > constraints.max_size_bytes:
msg = (
f"Video '{filename}' size ({_format_size(file_size)}) exceeds "
@@ -296,7 +288,6 @@ def validate_video(
max_size=constraints.max_size_bytes,
)
# Check format
content_type = file.content_type
if content_type not in constraints.supported_formats:
msg = (
@@ -336,7 +327,7 @@ def validate_text(
if constraints.general_max_size_bytes is None:
return errors
content = file.source.read()
content = file.read()
file_size = len(content)
filename = file.filename
@@ -423,5 +414,4 @@ def validate_file(
if isinstance(file, TextFile):
return validate_text(file, constraints, raise_on_error=raise_on_error)
# Unknown file type - can't validate
return []

View File

@@ -80,9 +80,8 @@ class FileResolver:
"""
provider_lower = provider.lower()
constraints = get_constraints_for_provider(provider)
file_size = len(file.source.read())
file_size = len(file.read())
# Determine if we should use file upload
should_upload = self._should_upload(
file, provider_lower, constraints, file_size
)
@@ -91,9 +90,7 @@ class FileResolver:
resolved = self._resolve_via_upload(file, provider_lower)
if resolved is not None:
return resolved
# Fall back to inline if upload fails
# Use inline format
return self._resolve_inline(file, provider_lower)
def resolve_files(
@@ -130,15 +127,12 @@ class FileResolver:
Returns:
True if the file should be uploaded, False otherwise.
"""
# Check if provider supports file upload
if constraints is None or not constraints.supports_file_upload:
return False
# If prefer_upload is set, always prefer upload
if self.config.prefer_upload:
return True
# Check against size threshold
threshold = self.config.upload_threshold_bytes
if threshold is None and constraints is not None:
threshold = constraints.file_upload_threshold_bytes
@@ -162,7 +156,6 @@ class FileResolver:
Returns:
FileReference if upload succeeds, None otherwise.
"""
# Check cache first
if self.upload_cache is not None:
cached = self.upload_cache.get(file, provider)
if cached is not None:
@@ -177,7 +170,6 @@ class FileResolver:
file_uri=cached.file_uri,
)
# Get or create uploader
uploader = self._get_uploader(provider)
if uploader is None:
logger.debug(f"No uploader available for {provider}")
@@ -186,7 +178,6 @@ class FileResolver:
try:
result = uploader.upload(file)
# Cache the result
if self.upload_cache is not None:
self.upload_cache.set(
file=file,
@@ -218,16 +209,14 @@ class FileResolver:
Returns:
InlineBase64 or InlineBytes depending on provider.
"""
content = file.source.read()
content = file.read()
# Use raw bytes for Bedrock if configured
if self.config.use_bytes_for_bedrock and "bedrock" in provider:
return InlineBytes(
content_type=file.content_type,
data=content,
)
# Default to base64
encoded = base64.b64encode(content).decode("ascii")
return InlineBase64(
content_type=file.content_type,

View File

@@ -126,10 +126,6 @@ class UploadCache:
if provider in self._provider_keys:
self._provider_keys[provider].discard(key)
# -------------------------------------------------------------------------
# Async methods (primary interface)
# -------------------------------------------------------------------------
async def aget(self, file: FileInput, provider: str) -> CachedUpload | None:
"""Get a cached upload for a file.
@@ -334,10 +330,6 @@ class UploadCache:
results.append(cached)
return results
# -------------------------------------------------------------------------
# Sync wrappers (convenience)
# -------------------------------------------------------------------------
def _run_sync(self, coro: Any) -> Any:
"""Run an async coroutine from sync context."""
try:
@@ -436,7 +428,6 @@ class UploadCache:
return builtins.set(self._provider_keys.keys())
# Module-level cache instance
_default_cache: UploadCache | None = None
@@ -482,7 +473,6 @@ def _cleanup_on_exit() -> None:
if _default_cache is None or len(_default_cache) == 0:
return
# Import here to avoid circular imports
from crewai.utilities.files.cleanup import cleanup_uploaded_files
try:

View File

@@ -77,18 +77,15 @@ class AnthropicFileUploader(FileUploader):
"""
client = self._get_client()
content = file.source.read()
content = file.read()
file_purpose = purpose or "user_upload"
# Create a file-like object for upload
file_data = io.BytesIO(content)
logger.info(
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
)
# Upload using the anthropic client
# Note: The Anthropic Files API uses a tuple format: (filename, file_obj, content_type)
uploaded_file = client.files.create(
file=(file.filename, file_data, file.content_type),
purpose=file_purpose,
@@ -98,9 +95,9 @@ class AnthropicFileUploader(FileUploader):
return UploadResult(
file_id=uploaded_file.id,
file_uri=None, # Anthropic doesn't provide a URI
file_uri=None,
content_type=file.content_type,
expires_at=None, # Anthropic files don't auto-expire
expires_at=None,
provider=self.provider_name,
)

View File

@@ -23,7 +23,6 @@ logger = logging.getLogger(__name__)
FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile
# Gemini files expire after 48 hours
GEMINI_FILE_TTL = timedelta(hours=48)
@@ -80,10 +79,9 @@ class GeminiFileUploader(FileUploader):
"""
client = self._get_client()
content = file.source.read()
content = file.read()
display_name = purpose or file.filename
# Create a file-like object for upload
file_data = io.BytesIO(content)
file_data.name = file.filename
@@ -91,7 +89,6 @@ class GeminiFileUploader(FileUploader):
f"Uploading file '{file.filename}' to Gemini ({len(content)} bytes)"
)
# Upload using the genai client
uploaded_file = client.files.upload(
file=file_data,
config={
@@ -199,7 +196,6 @@ class GeminiFileUploader(FileUploader):
try:
from google.genai.types import FileState
except ImportError:
# If we can't import FileState, just return True
return True
client = self._get_client()

View File

@@ -77,7 +77,7 @@ class OpenAIFileUploader(FileUploader):
"""
client = self._get_client()
content = file.source.read()
content = file.read()
file_purpose = purpose or "user_data"
file_data = io.BytesIO(content)
@@ -96,9 +96,9 @@ class OpenAIFileUploader(FileUploader):
return UploadResult(
file_id=uploaded_file.id,
file_uri=None, # OpenAI doesn't provide a URI
file_uri=None,
content_type=file.content_type,
expires_at=None, # OpenAI files don't auto-expire
expires_at=None,
provider=self.provider_name,
)