mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-02 07:42:40 +00:00
chore: remove unnecessary comments and fix type errors
- Remove unnecessary block and inline comments from file utilities - Fix mypy errors by using file.read() instead of file.source.read()
This commit is contained in:
@@ -64,7 +64,6 @@ class ReadFileTool(BaseTool):
|
|||||||
content_type = file_input.content_type
|
content_type = file_input.content_type
|
||||||
filename = file_input.filename or file_name
|
filename = file_input.filename or file_name
|
||||||
|
|
||||||
# Text-based content types
|
|
||||||
text_types = (
|
text_types = (
|
||||||
"text/",
|
"text/",
|
||||||
"application/json",
|
"application/json",
|
||||||
@@ -75,6 +74,5 @@ class ReadFileTool(BaseTool):
|
|||||||
if any(content_type.startswith(t) for t in text_types):
|
if any(content_type.startswith(t) for t in text_types):
|
||||||
return content.decode("utf-8")
|
return content.decode("utf-8")
|
||||||
|
|
||||||
# Binary content - return base64 encoded
|
|
||||||
encoded = base64.b64encode(content).decode("ascii")
|
encoded = base64.b64encode(content).decode("ascii")
|
||||||
return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}"
|
return f"[Binary file: {filename} ({content_type})]\nBase64: {encoded}"
|
||||||
|
|||||||
@@ -34,18 +34,15 @@ def _run_sync(coro: Coroutine[None, None, T]) -> T:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
asyncio.get_running_loop()
|
asyncio.get_running_loop()
|
||||||
# We're in an async context - run in a thread pool
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||||
future = executor.submit(asyncio.run, coro)
|
future = executor.submit(asyncio.run, coro)
|
||||||
return future.result()
|
return future.result()
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
# No running loop - safe to create one
|
|
||||||
return asyncio.run(coro)
|
return asyncio.run(coro)
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_TTL = 3600
|
DEFAULT_TTL = 3600
|
||||||
|
|
||||||
# Key prefixes for different scopes
|
|
||||||
_CREW_PREFIX = "crew:"
|
_CREW_PREFIX = "crew:"
|
||||||
_TASK_PREFIX = "task:"
|
_TASK_PREFIX = "task:"
|
||||||
|
|
||||||
@@ -149,7 +146,6 @@ async def aget_all_files(
|
|||||||
if not crew_files and not task_files:
|
if not crew_files and not task_files:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Merge with task files taking precedence
|
|
||||||
return {**crew_files, **(task_files or {})}
|
return {**crew_files, **(task_files or {})}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -96,7 +96,6 @@ def wrap_file_source(source: FileSource) -> FileInput:
|
|||||||
return VideoFile(source=source)
|
return VideoFile(source=source)
|
||||||
if content_type == "application/pdf":
|
if content_type == "application/pdf":
|
||||||
return PDFFile(source=source)
|
return PDFFile(source=source)
|
||||||
# Default to text for anything else
|
|
||||||
return TextFile(source=source)
|
return TextFile(source=source)
|
||||||
|
|
||||||
|
|
||||||
@@ -116,10 +115,8 @@ def normalize_input_files(
|
|||||||
result: dict[str, FileInput] = {}
|
result: dict[str, FileInput] = {}
|
||||||
|
|
||||||
for i, item in enumerate(input_files):
|
for i, item in enumerate(input_files):
|
||||||
# If it's already a typed File wrapper, use it directly
|
|
||||||
if isinstance(item, BaseFile):
|
if isinstance(item, BaseFile):
|
||||||
name = item.filename or f"file_{i}"
|
name = item.filename or f"file_{i}"
|
||||||
# Remove extension from name for cleaner keys
|
|
||||||
if "." in name:
|
if "." in name:
|
||||||
name = name.rsplit(".", 1)[0]
|
name = name.rsplit(".", 1)[0]
|
||||||
result[name] = item
|
result[name] = item
|
||||||
|
|||||||
@@ -108,45 +108,38 @@ class ProviderConstraints:
|
|||||||
file_upload_threshold_bytes: int | None = None
|
file_upload_threshold_bytes: int | None = None
|
||||||
|
|
||||||
|
|
||||||
# Anthropic constraints (Claude 3+)
|
|
||||||
# https://docs.anthropic.com/en/docs/build-with-claude/vision
|
|
||||||
ANTHROPIC_CONSTRAINTS = ProviderConstraints(
|
ANTHROPIC_CONSTRAINTS = ProviderConstraints(
|
||||||
name="anthropic",
|
name="anthropic",
|
||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=5 * 1024 * 1024, # 5MB
|
max_size_bytes=5 * 1024 * 1024,
|
||||||
max_width=8000,
|
max_width=8000,
|
||||||
max_height=8000,
|
max_height=8000,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
||||||
),
|
),
|
||||||
pdf=PDFConstraints(
|
pdf=PDFConstraints(
|
||||||
max_size_bytes=30 * 1024 * 1024, # 30MB
|
max_size_bytes=30 * 1024 * 1024,
|
||||||
max_pages=100,
|
max_pages=100,
|
||||||
),
|
),
|
||||||
supports_file_upload=True,
|
supports_file_upload=True,
|
||||||
file_upload_threshold_bytes=5 * 1024 * 1024, # Use upload for files > 5MB
|
file_upload_threshold_bytes=5 * 1024 * 1024,
|
||||||
)
|
)
|
||||||
|
|
||||||
# OpenAI constraints (GPT-4o, GPT-4 Vision)
|
|
||||||
# https://platform.openai.com/docs/guides/vision
|
|
||||||
OPENAI_CONSTRAINTS = ProviderConstraints(
|
OPENAI_CONSTRAINTS = ProviderConstraints(
|
||||||
name="openai",
|
name="openai",
|
||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=20 * 1024 * 1024, # 20MB
|
max_size_bytes=20 * 1024 * 1024,
|
||||||
max_images_per_request=10,
|
max_images_per_request=10,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
||||||
),
|
),
|
||||||
# OpenAI does not support PDFs natively
|
|
||||||
pdf=None,
|
pdf=None,
|
||||||
supports_file_upload=True,
|
supports_file_upload=True,
|
||||||
file_upload_threshold_bytes=5 * 1024 * 1024, # Use upload for files > 5MB
|
file_upload_threshold_bytes=5 * 1024 * 1024,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Gemini constraints
|
|
||||||
# https://ai.google.dev/gemini-api/docs/vision
|
|
||||||
GEMINI_CONSTRAINTS = ProviderConstraints(
|
GEMINI_CONSTRAINTS = ProviderConstraints(
|
||||||
name="gemini",
|
name="gemini",
|
||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=100 * 1024 * 1024, # 100MB inline
|
max_size_bytes=100 * 1024 * 1024,
|
||||||
supported_formats=(
|
supported_formats=(
|
||||||
"image/png",
|
"image/png",
|
||||||
"image/jpeg",
|
"image/jpeg",
|
||||||
@@ -157,10 +150,10 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
pdf=PDFConstraints(
|
pdf=PDFConstraints(
|
||||||
max_size_bytes=50 * 1024 * 1024, # 50MB inline
|
max_size_bytes=50 * 1024 * 1024,
|
||||||
),
|
),
|
||||||
audio=AudioConstraints(
|
audio=AudioConstraints(
|
||||||
max_size_bytes=100 * 1024 * 1024, # 100MB
|
max_size_bytes=100 * 1024 * 1024,
|
||||||
supported_formats=(
|
supported_formats=(
|
||||||
"audio/mp3",
|
"audio/mp3",
|
||||||
"audio/mpeg",
|
"audio/mpeg",
|
||||||
@@ -173,7 +166,7 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
video=VideoConstraints(
|
video=VideoConstraints(
|
||||||
max_size_bytes=2 * 1024 * 1024 * 1024, # 2GB via File API
|
max_size_bytes=2 * 1024 * 1024 * 1024,
|
||||||
supported_formats=(
|
supported_formats=(
|
||||||
"video/mp4",
|
"video/mp4",
|
||||||
"video/mpeg",
|
"video/mpeg",
|
||||||
@@ -184,30 +177,27 @@ GEMINI_CONSTRAINTS = ProviderConstraints(
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
supports_file_upload=True,
|
supports_file_upload=True,
|
||||||
file_upload_threshold_bytes=20 * 1024 * 1024, # Use upload for files > 20MB
|
file_upload_threshold_bytes=20 * 1024 * 1024,
|
||||||
)
|
)
|
||||||
|
|
||||||
# AWS Bedrock constraints (Claude via Bedrock)
|
|
||||||
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
|
|
||||||
BEDROCK_CONSTRAINTS = ProviderConstraints(
|
BEDROCK_CONSTRAINTS = ProviderConstraints(
|
||||||
name="bedrock",
|
name="bedrock",
|
||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=4_608_000, # ~4.5MB (encoded size limit)
|
max_size_bytes=4_608_000,
|
||||||
max_width=8000,
|
max_width=8000,
|
||||||
max_height=8000,
|
max_height=8000,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
||||||
),
|
),
|
||||||
pdf=PDFConstraints(
|
pdf=PDFConstraints(
|
||||||
max_size_bytes=3_840_000, # ~3.75MB
|
max_size_bytes=3_840_000,
|
||||||
max_pages=100,
|
max_pages=100,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Azure OpenAI constraints (same as OpenAI)
|
|
||||||
AZURE_CONSTRAINTS = ProviderConstraints(
|
AZURE_CONSTRAINTS = ProviderConstraints(
|
||||||
name="azure",
|
name="azure",
|
||||||
image=ImageConstraints(
|
image=ImageConstraints(
|
||||||
max_size_bytes=20 * 1024 * 1024, # 20MB
|
max_size_bytes=20 * 1024 * 1024,
|
||||||
max_images_per_request=10,
|
max_images_per_request=10,
|
||||||
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
supported_formats=("image/png", "image/jpeg", "image/gif", "image/webp"),
|
||||||
),
|
),
|
||||||
@@ -215,14 +205,12 @@ AZURE_CONSTRAINTS = ProviderConstraints(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Provider name mapping for convenience
|
|
||||||
_PROVIDER_CONSTRAINTS_MAP: dict[str, ProviderConstraints] = {
|
_PROVIDER_CONSTRAINTS_MAP: dict[str, ProviderConstraints] = {
|
||||||
"anthropic": ANTHROPIC_CONSTRAINTS,
|
"anthropic": ANTHROPIC_CONSTRAINTS,
|
||||||
"openai": OPENAI_CONSTRAINTS,
|
"openai": OPENAI_CONSTRAINTS,
|
||||||
"gemini": GEMINI_CONSTRAINTS,
|
"gemini": GEMINI_CONSTRAINTS,
|
||||||
"bedrock": BEDROCK_CONSTRAINTS,
|
"bedrock": BEDROCK_CONSTRAINTS,
|
||||||
"azure": AZURE_CONSTRAINTS,
|
"azure": AZURE_CONSTRAINTS,
|
||||||
# Aliases
|
|
||||||
"claude": ANTHROPIC_CONSTRAINTS,
|
"claude": ANTHROPIC_CONSTRAINTS,
|
||||||
"gpt": OPENAI_CONSTRAINTS,
|
"gpt": OPENAI_CONSTRAINTS,
|
||||||
"google": GEMINI_CONSTRAINTS,
|
"google": GEMINI_CONSTRAINTS,
|
||||||
@@ -246,11 +234,9 @@ def get_constraints_for_provider(
|
|||||||
|
|
||||||
provider_lower = provider.lower()
|
provider_lower = provider.lower()
|
||||||
|
|
||||||
# Direct lookup
|
|
||||||
if provider_lower in _PROVIDER_CONSTRAINTS_MAP:
|
if provider_lower in _PROVIDER_CONSTRAINTS_MAP:
|
||||||
return _PROVIDER_CONSTRAINTS_MAP[provider_lower]
|
return _PROVIDER_CONSTRAINTS_MAP[provider_lower]
|
||||||
|
|
||||||
# Check if provider name contains any known provider
|
|
||||||
for key, constraints in _PROVIDER_CONSTRAINTS_MAP.items():
|
for key, constraints in _PROVIDER_CONSTRAINTS_MAP.items():
|
||||||
if key in provider_lower:
|
if key in provider_lower:
|
||||||
return constraints
|
return constraints
|
||||||
|
|||||||
@@ -125,15 +125,12 @@ class FileProcessor:
|
|||||||
mode = self._get_mode(file)
|
mode = self._get_mode(file)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# First validate
|
|
||||||
errors = self.validate(file)
|
errors = self.validate(file)
|
||||||
|
|
||||||
if not errors:
|
if not errors:
|
||||||
return file
|
return file
|
||||||
|
|
||||||
# Handle based on mode
|
|
||||||
if mode == FileHandling.STRICT:
|
if mode == FileHandling.STRICT:
|
||||||
# Errors should have already raised in validate()
|
|
||||||
raise FileValidationError("; ".join(errors), file_name=file.filename)
|
raise FileValidationError("; ".join(errors), file_name=file.filename)
|
||||||
|
|
||||||
if mode == FileHandling.WARN:
|
if mode == FileHandling.WARN:
|
||||||
@@ -178,7 +175,6 @@ class FileProcessor:
|
|||||||
if isinstance(processed, Sequence) and not isinstance(
|
if isinstance(processed, Sequence) and not isinstance(
|
||||||
processed, (str, bytes)
|
processed, (str, bytes)
|
||||||
):
|
):
|
||||||
# File was chunked - add each chunk with indexed name
|
|
||||||
for i, chunk in enumerate(processed):
|
for i, chunk in enumerate(processed):
|
||||||
chunk_name = f"{name}_chunk_{i}"
|
chunk_name = f"{name}_chunk_{i}"
|
||||||
result[chunk_name] = chunk
|
result[chunk_name] = chunk
|
||||||
@@ -203,15 +199,12 @@ class FileProcessor:
|
|||||||
return self._auto_process_image(file)
|
return self._auto_process_image(file)
|
||||||
|
|
||||||
if isinstance(file, PDFFile) and self.constraints.pdf is not None:
|
if isinstance(file, PDFFile) and self.constraints.pdf is not None:
|
||||||
# PDFs can't easily be auto-compressed, log warning
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Cannot auto-compress PDF '{file.filename}'. "
|
f"Cannot auto-compress PDF '{file.filename}'. "
|
||||||
"Consider using CHUNK mode for large PDFs."
|
"Consider using CHUNK mode for large PDFs."
|
||||||
)
|
)
|
||||||
return file
|
return file
|
||||||
|
|
||||||
# Audio and video auto-processing would require additional dependencies
|
|
||||||
# For now, just warn
|
|
||||||
if isinstance(file, (AudioFile, VideoFile)):
|
if isinstance(file, (AudioFile, VideoFile)):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Auto-processing not supported for {type(file).__name__}. "
|
f"Auto-processing not supported for {type(file).__name__}. "
|
||||||
@@ -235,10 +228,9 @@ class FileProcessor:
|
|||||||
|
|
||||||
image_constraints = self.constraints.image
|
image_constraints = self.constraints.image
|
||||||
processed = file
|
processed = file
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
current_size = len(content)
|
current_size = len(content)
|
||||||
|
|
||||||
# First, resize if dimensions exceed limits
|
|
||||||
if image_constraints.max_width or image_constraints.max_height:
|
if image_constraints.max_width or image_constraints.max_height:
|
||||||
dimensions = get_image_dimensions(file)
|
dimensions = get_image_dimensions(file)
|
||||||
if dimensions:
|
if dimensions:
|
||||||
@@ -249,12 +241,11 @@ class FileProcessor:
|
|||||||
if width > max_w or height > max_h:
|
if width > max_w or height > max_h:
|
||||||
try:
|
try:
|
||||||
processed = resize_image(file, max_w, max_h)
|
processed = resize_image(file, max_w, max_h)
|
||||||
content = processed.source.read()
|
content = processed.read()
|
||||||
current_size = len(content)
|
current_size = len(content)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to resize image: {e}")
|
logger.warning(f"Failed to resize image: {e}")
|
||||||
|
|
||||||
# Then, optimize if size still exceeds limits
|
|
||||||
if current_size > image_constraints.max_size_bytes:
|
if current_size > image_constraints.max_size_bytes:
|
||||||
try:
|
try:
|
||||||
processed = optimize_image(processed, image_constraints.max_size_bytes)
|
processed = optimize_image(processed, image_constraints.max_size_bytes)
|
||||||
@@ -290,7 +281,7 @@ class FileProcessor:
|
|||||||
# Use general max size as character limit approximation
|
# Use general max size as character limit approximation
|
||||||
max_size = self.constraints.general_max_size_bytes
|
max_size = self.constraints.general_max_size_bytes
|
||||||
if max_size is not None:
|
if max_size is not None:
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
if len(content) > max_size:
|
if len(content) > max_size:
|
||||||
try:
|
try:
|
||||||
return chunk_text(file, max_size)
|
return chunk_text(file, max_size)
|
||||||
@@ -298,7 +289,6 @@ class FileProcessor:
|
|||||||
logger.warning(f"Failed to chunk text file: {e}")
|
logger.warning(f"Failed to chunk text file: {e}")
|
||||||
return file
|
return file
|
||||||
|
|
||||||
# For other file types, chunking is not supported
|
|
||||||
if isinstance(file, (ImageFile, AudioFile, VideoFile)):
|
if isinstance(file, (ImageFile, AudioFile, VideoFile)):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Chunking not supported for {type(file).__name__}. "
|
f"Chunking not supported for {type(file).__name__}. "
|
||||||
|
|||||||
@@ -42,17 +42,15 @@ def resize_image(
|
|||||||
install_command="pip install Pillow",
|
install_command="pip install Pillow",
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
|
|
||||||
with Image.open(io.BytesIO(content)) as img:
|
with Image.open(io.BytesIO(content)) as img:
|
||||||
original_width, original_height = img.size
|
original_width, original_height = img.size
|
||||||
|
|
||||||
# Check if resize is needed
|
|
||||||
if original_width <= max_width and original_height <= max_height:
|
if original_width <= max_width and original_height <= max_height:
|
||||||
return file
|
return file
|
||||||
|
|
||||||
if preserve_aspect_ratio:
|
if preserve_aspect_ratio:
|
||||||
# Calculate scaling factor to fit within bounds
|
|
||||||
width_ratio = max_width / original_width
|
width_ratio = max_width / original_width
|
||||||
height_ratio = max_height / original_height
|
height_ratio = max_height / original_height
|
||||||
scale_factor = min(width_ratio, height_ratio)
|
scale_factor = min(width_ratio, height_ratio)
|
||||||
@@ -63,17 +61,13 @@ def resize_image(
|
|||||||
new_width = min(original_width, max_width)
|
new_width = min(original_width, max_width)
|
||||||
new_height = min(original_height, max_height)
|
new_height = min(original_height, max_height)
|
||||||
|
|
||||||
# Resize the image
|
|
||||||
resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||||
|
|
||||||
# Determine output format
|
|
||||||
output_format = img.format or "PNG"
|
output_format = img.format or "PNG"
|
||||||
if output_format.upper() == "JPEG":
|
if output_format.upper() == "JPEG":
|
||||||
# Handle RGBA images for JPEG
|
|
||||||
if resized_img.mode in ("RGBA", "LA", "P"):
|
if resized_img.mode in ("RGBA", "LA", "P"):
|
||||||
resized_img = resized_img.convert("RGB")
|
resized_img = resized_img.convert("RGB")
|
||||||
|
|
||||||
# Save to bytes
|
|
||||||
output_buffer = io.BytesIO()
|
output_buffer = io.BytesIO()
|
||||||
resized_img.save(output_buffer, format=output_format)
|
resized_img.save(output_buffer, format=output_format)
|
||||||
output_bytes = output_buffer.getvalue()
|
output_bytes = output_buffer.getvalue()
|
||||||
@@ -118,15 +112,13 @@ def optimize_image(
|
|||||||
install_command="pip install Pillow",
|
install_command="pip install Pillow",
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
current_size = len(content)
|
current_size = len(content)
|
||||||
|
|
||||||
# If already within target, return as-is
|
|
||||||
if current_size <= target_size_bytes:
|
if current_size <= target_size_bytes:
|
||||||
return file
|
return file
|
||||||
|
|
||||||
with Image.open(io.BytesIO(content)) as img:
|
with Image.open(io.BytesIO(content)) as img:
|
||||||
# Convert to RGB for JPEG compression if needed
|
|
||||||
if img.mode in ("RGBA", "LA", "P"):
|
if img.mode in ("RGBA", "LA", "P"):
|
||||||
img = img.convert("RGB")
|
img = img.convert("RGB")
|
||||||
output_format = "JPEG"
|
output_format = "JPEG"
|
||||||
@@ -138,7 +130,6 @@ def optimize_image(
|
|||||||
quality = initial_quality
|
quality = initial_quality
|
||||||
output_bytes = content
|
output_bytes = content
|
||||||
|
|
||||||
# Binary search for optimal quality
|
|
||||||
while len(output_bytes) > target_size_bytes and quality >= min_quality:
|
while len(output_bytes) > target_size_bytes and quality >= min_quality:
|
||||||
output_buffer = io.BytesIO()
|
output_buffer = io.BytesIO()
|
||||||
img.save(
|
img.save(
|
||||||
@@ -193,11 +184,10 @@ def chunk_pdf(
|
|||||||
install_command="pip install pypdf",
|
install_command="pip install pypdf",
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
reader = PdfReader(io.BytesIO(content))
|
reader = PdfReader(io.BytesIO(content))
|
||||||
total_pages = len(reader.pages)
|
total_pages = len(reader.pages)
|
||||||
|
|
||||||
# If within limit, return as-is
|
|
||||||
if total_pages <= max_pages:
|
if total_pages <= max_pages:
|
||||||
return [file]
|
return [file]
|
||||||
|
|
||||||
@@ -253,11 +243,10 @@ def chunk_text(
|
|||||||
Returns:
|
Returns:
|
||||||
List of TextFile objects, one per chunk.
|
List of TextFile objects, one per chunk.
|
||||||
"""
|
"""
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
text = content.decode("utf-8", errors="replace")
|
text = content.decode("utf-8", errors="replace")
|
||||||
total_chars = len(text)
|
total_chars = len(text)
|
||||||
|
|
||||||
# If within limit, return as-is
|
|
||||||
if total_chars <= max_chars:
|
if total_chars <= max_chars:
|
||||||
return [file]
|
return [file]
|
||||||
|
|
||||||
@@ -291,7 +280,6 @@ def chunk_text(
|
|||||||
f"Created text chunk '{chunk_filename}' with {len(chunk_text)} characters"
|
f"Created text chunk '{chunk_filename}' with {len(chunk_text)} characters"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Move start position with overlap
|
|
||||||
start_pos = end_pos - overlap_chars if end_pos < total_chars else total_chars
|
start_pos = end_pos - overlap_chars if end_pos < total_chars else total_chars
|
||||||
chunk_num += 1
|
chunk_num += 1
|
||||||
|
|
||||||
@@ -313,7 +301,7 @@ def get_image_dimensions(file: ImageFile) -> tuple[int, int] | None:
|
|||||||
logger.warning("Pillow not installed - cannot get image dimensions")
|
logger.warning("Pillow not installed - cannot get image dimensions")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with Image.open(io.BytesIO(content)) as img:
|
with Image.open(io.BytesIO(content)) as img:
|
||||||
@@ -339,7 +327,7 @@ def get_pdf_page_count(file: PDFFile) -> int | None:
|
|||||||
logger.warning("pypdf not installed - cannot get PDF page count")
|
logger.warning("pypdf not installed - cannot get PDF page count")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
reader = PdfReader(io.BytesIO(content))
|
reader = PdfReader(io.BytesIO(content))
|
||||||
|
|||||||
@@ -63,11 +63,10 @@ def validate_image(
|
|||||||
UnsupportedFileTypeError: If the format is not supported.
|
UnsupportedFileTypeError: If the format is not supported.
|
||||||
"""
|
"""
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_size = len(content)
|
file_size = len(content)
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
|
|
||||||
# Check file size
|
|
||||||
if file_size > constraints.max_size_bytes:
|
if file_size > constraints.max_size_bytes:
|
||||||
msg = (
|
msg = (
|
||||||
f"Image '{filename}' size ({_format_size(file_size)}) exceeds "
|
f"Image '{filename}' size ({_format_size(file_size)}) exceeds "
|
||||||
@@ -82,7 +81,6 @@ def validate_image(
|
|||||||
max_size=constraints.max_size_bytes,
|
max_size=constraints.max_size_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check format
|
|
||||||
content_type = file.content_type
|
content_type = file.content_type
|
||||||
if content_type not in constraints.supported_formats:
|
if content_type not in constraints.supported_formats:
|
||||||
msg = (
|
msg = (
|
||||||
@@ -95,7 +93,6 @@ def validate_image(
|
|||||||
msg, file_name=filename, content_type=content_type
|
msg, file_name=filename, content_type=content_type
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check dimensions if constraints specify them
|
|
||||||
if constraints.max_width is not None or constraints.max_height is not None:
|
if constraints.max_width is not None or constraints.max_height is not None:
|
||||||
try:
|
try:
|
||||||
import io
|
import io
|
||||||
@@ -153,11 +150,10 @@ def validate_pdf(
|
|||||||
FileValidationError: If the file exceeds page limits.
|
FileValidationError: If the file exceeds page limits.
|
||||||
"""
|
"""
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_size = len(content)
|
file_size = len(content)
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
|
|
||||||
# Check file size
|
|
||||||
if file_size > constraints.max_size_bytes:
|
if file_size > constraints.max_size_bytes:
|
||||||
msg = (
|
msg = (
|
||||||
f"PDF '{filename}' size ({_format_size(file_size)}) exceeds "
|
f"PDF '{filename}' size ({_format_size(file_size)}) exceeds "
|
||||||
@@ -172,7 +168,6 @@ def validate_pdf(
|
|||||||
max_size=constraints.max_size_bytes,
|
max_size=constraints.max_size_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check page count if constraint specifies it
|
|
||||||
if constraints.max_pages is not None:
|
if constraints.max_pages is not None:
|
||||||
try:
|
try:
|
||||||
import io
|
import io
|
||||||
@@ -221,11 +216,10 @@ def validate_audio(
|
|||||||
UnsupportedFileTypeError: If the format is not supported.
|
UnsupportedFileTypeError: If the format is not supported.
|
||||||
"""
|
"""
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_size = len(content)
|
file_size = len(content)
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
|
|
||||||
# Check file size
|
|
||||||
if file_size > constraints.max_size_bytes:
|
if file_size > constraints.max_size_bytes:
|
||||||
msg = (
|
msg = (
|
||||||
f"Audio '{filename}' size ({_format_size(file_size)}) exceeds "
|
f"Audio '{filename}' size ({_format_size(file_size)}) exceeds "
|
||||||
@@ -240,7 +234,6 @@ def validate_audio(
|
|||||||
max_size=constraints.max_size_bytes,
|
max_size=constraints.max_size_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check format
|
|
||||||
content_type = file.content_type
|
content_type = file.content_type
|
||||||
if content_type not in constraints.supported_formats:
|
if content_type not in constraints.supported_formats:
|
||||||
msg = (
|
msg = (
|
||||||
@@ -277,11 +270,10 @@ def validate_video(
|
|||||||
UnsupportedFileTypeError: If the format is not supported.
|
UnsupportedFileTypeError: If the format is not supported.
|
||||||
"""
|
"""
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_size = len(content)
|
file_size = len(content)
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
|
|
||||||
# Check file size
|
|
||||||
if file_size > constraints.max_size_bytes:
|
if file_size > constraints.max_size_bytes:
|
||||||
msg = (
|
msg = (
|
||||||
f"Video '{filename}' size ({_format_size(file_size)}) exceeds "
|
f"Video '{filename}' size ({_format_size(file_size)}) exceeds "
|
||||||
@@ -296,7 +288,6 @@ def validate_video(
|
|||||||
max_size=constraints.max_size_bytes,
|
max_size=constraints.max_size_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check format
|
|
||||||
content_type = file.content_type
|
content_type = file.content_type
|
||||||
if content_type not in constraints.supported_formats:
|
if content_type not in constraints.supported_formats:
|
||||||
msg = (
|
msg = (
|
||||||
@@ -336,7 +327,7 @@ def validate_text(
|
|||||||
if constraints.general_max_size_bytes is None:
|
if constraints.general_max_size_bytes is None:
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_size = len(content)
|
file_size = len(content)
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
|
|
||||||
@@ -423,5 +414,4 @@ def validate_file(
|
|||||||
if isinstance(file, TextFile):
|
if isinstance(file, TextFile):
|
||||||
return validate_text(file, constraints, raise_on_error=raise_on_error)
|
return validate_text(file, constraints, raise_on_error=raise_on_error)
|
||||||
|
|
||||||
# Unknown file type - can't validate
|
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -80,9 +80,8 @@ class FileResolver:
|
|||||||
"""
|
"""
|
||||||
provider_lower = provider.lower()
|
provider_lower = provider.lower()
|
||||||
constraints = get_constraints_for_provider(provider)
|
constraints = get_constraints_for_provider(provider)
|
||||||
file_size = len(file.source.read())
|
file_size = len(file.read())
|
||||||
|
|
||||||
# Determine if we should use file upload
|
|
||||||
should_upload = self._should_upload(
|
should_upload = self._should_upload(
|
||||||
file, provider_lower, constraints, file_size
|
file, provider_lower, constraints, file_size
|
||||||
)
|
)
|
||||||
@@ -91,9 +90,7 @@ class FileResolver:
|
|||||||
resolved = self._resolve_via_upload(file, provider_lower)
|
resolved = self._resolve_via_upload(file, provider_lower)
|
||||||
if resolved is not None:
|
if resolved is not None:
|
||||||
return resolved
|
return resolved
|
||||||
# Fall back to inline if upload fails
|
|
||||||
|
|
||||||
# Use inline format
|
|
||||||
return self._resolve_inline(file, provider_lower)
|
return self._resolve_inline(file, provider_lower)
|
||||||
|
|
||||||
def resolve_files(
|
def resolve_files(
|
||||||
@@ -130,15 +127,12 @@ class FileResolver:
|
|||||||
Returns:
|
Returns:
|
||||||
True if the file should be uploaded, False otherwise.
|
True if the file should be uploaded, False otherwise.
|
||||||
"""
|
"""
|
||||||
# Check if provider supports file upload
|
|
||||||
if constraints is None or not constraints.supports_file_upload:
|
if constraints is None or not constraints.supports_file_upload:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# If prefer_upload is set, always prefer upload
|
|
||||||
if self.config.prefer_upload:
|
if self.config.prefer_upload:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check against size threshold
|
|
||||||
threshold = self.config.upload_threshold_bytes
|
threshold = self.config.upload_threshold_bytes
|
||||||
if threshold is None and constraints is not None:
|
if threshold is None and constraints is not None:
|
||||||
threshold = constraints.file_upload_threshold_bytes
|
threshold = constraints.file_upload_threshold_bytes
|
||||||
@@ -162,7 +156,6 @@ class FileResolver:
|
|||||||
Returns:
|
Returns:
|
||||||
FileReference if upload succeeds, None otherwise.
|
FileReference if upload succeeds, None otherwise.
|
||||||
"""
|
"""
|
||||||
# Check cache first
|
|
||||||
if self.upload_cache is not None:
|
if self.upload_cache is not None:
|
||||||
cached = self.upload_cache.get(file, provider)
|
cached = self.upload_cache.get(file, provider)
|
||||||
if cached is not None:
|
if cached is not None:
|
||||||
@@ -177,7 +170,6 @@ class FileResolver:
|
|||||||
file_uri=cached.file_uri,
|
file_uri=cached.file_uri,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get or create uploader
|
|
||||||
uploader = self._get_uploader(provider)
|
uploader = self._get_uploader(provider)
|
||||||
if uploader is None:
|
if uploader is None:
|
||||||
logger.debug(f"No uploader available for {provider}")
|
logger.debug(f"No uploader available for {provider}")
|
||||||
@@ -186,7 +178,6 @@ class FileResolver:
|
|||||||
try:
|
try:
|
||||||
result = uploader.upload(file)
|
result = uploader.upload(file)
|
||||||
|
|
||||||
# Cache the result
|
|
||||||
if self.upload_cache is not None:
|
if self.upload_cache is not None:
|
||||||
self.upload_cache.set(
|
self.upload_cache.set(
|
||||||
file=file,
|
file=file,
|
||||||
@@ -218,16 +209,14 @@ class FileResolver:
|
|||||||
Returns:
|
Returns:
|
||||||
InlineBase64 or InlineBytes depending on provider.
|
InlineBase64 or InlineBytes depending on provider.
|
||||||
"""
|
"""
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
|
|
||||||
# Use raw bytes for Bedrock if configured
|
|
||||||
if self.config.use_bytes_for_bedrock and "bedrock" in provider:
|
if self.config.use_bytes_for_bedrock and "bedrock" in provider:
|
||||||
return InlineBytes(
|
return InlineBytes(
|
||||||
content_type=file.content_type,
|
content_type=file.content_type,
|
||||||
data=content,
|
data=content,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Default to base64
|
|
||||||
encoded = base64.b64encode(content).decode("ascii")
|
encoded = base64.b64encode(content).decode("ascii")
|
||||||
return InlineBase64(
|
return InlineBase64(
|
||||||
content_type=file.content_type,
|
content_type=file.content_type,
|
||||||
|
|||||||
@@ -126,10 +126,6 @@ class UploadCache:
|
|||||||
if provider in self._provider_keys:
|
if provider in self._provider_keys:
|
||||||
self._provider_keys[provider].discard(key)
|
self._provider_keys[provider].discard(key)
|
||||||
|
|
||||||
# -------------------------------------------------------------------------
|
|
||||||
# Async methods (primary interface)
|
|
||||||
# -------------------------------------------------------------------------
|
|
||||||
|
|
||||||
async def aget(self, file: FileInput, provider: str) -> CachedUpload | None:
|
async def aget(self, file: FileInput, provider: str) -> CachedUpload | None:
|
||||||
"""Get a cached upload for a file.
|
"""Get a cached upload for a file.
|
||||||
|
|
||||||
@@ -334,10 +330,6 @@ class UploadCache:
|
|||||||
results.append(cached)
|
results.append(cached)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# -------------------------------------------------------------------------
|
|
||||||
# Sync wrappers (convenience)
|
|
||||||
# -------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _run_sync(self, coro: Any) -> Any:
|
def _run_sync(self, coro: Any) -> Any:
|
||||||
"""Run an async coroutine from sync context."""
|
"""Run an async coroutine from sync context."""
|
||||||
try:
|
try:
|
||||||
@@ -436,7 +428,6 @@ class UploadCache:
|
|||||||
return builtins.set(self._provider_keys.keys())
|
return builtins.set(self._provider_keys.keys())
|
||||||
|
|
||||||
|
|
||||||
# Module-level cache instance
|
|
||||||
_default_cache: UploadCache | None = None
|
_default_cache: UploadCache | None = None
|
||||||
|
|
||||||
|
|
||||||
@@ -482,7 +473,6 @@ def _cleanup_on_exit() -> None:
|
|||||||
if _default_cache is None or len(_default_cache) == 0:
|
if _default_cache is None or len(_default_cache) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Import here to avoid circular imports
|
|
||||||
from crewai.utilities.files.cleanup import cleanup_uploaded_files
|
from crewai.utilities.files.cleanup import cleanup_uploaded_files
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -77,18 +77,15 @@ class AnthropicFileUploader(FileUploader):
|
|||||||
"""
|
"""
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_purpose = purpose or "user_upload"
|
file_purpose = purpose or "user_upload"
|
||||||
|
|
||||||
# Create a file-like object for upload
|
|
||||||
file_data = io.BytesIO(content)
|
file_data = io.BytesIO(content)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
|
f"Uploading file '{file.filename}' to Anthropic ({len(content)} bytes)"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Upload using the anthropic client
|
|
||||||
# Note: The Anthropic Files API uses a tuple format: (filename, file_obj, content_type)
|
|
||||||
uploaded_file = client.files.create(
|
uploaded_file = client.files.create(
|
||||||
file=(file.filename, file_data, file.content_type),
|
file=(file.filename, file_data, file.content_type),
|
||||||
purpose=file_purpose,
|
purpose=file_purpose,
|
||||||
@@ -98,9 +95,9 @@ class AnthropicFileUploader(FileUploader):
|
|||||||
|
|
||||||
return UploadResult(
|
return UploadResult(
|
||||||
file_id=uploaded_file.id,
|
file_id=uploaded_file.id,
|
||||||
file_uri=None, # Anthropic doesn't provide a URI
|
file_uri=None,
|
||||||
content_type=file.content_type,
|
content_type=file.content_type,
|
||||||
expires_at=None, # Anthropic files don't auto-expire
|
expires_at=None,
|
||||||
provider=self.provider_name,
|
provider=self.provider_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile
|
FileInput = AudioFile | File | ImageFile | PDFFile | TextFile | VideoFile
|
||||||
|
|
||||||
# Gemini files expire after 48 hours
|
|
||||||
GEMINI_FILE_TTL = timedelta(hours=48)
|
GEMINI_FILE_TTL = timedelta(hours=48)
|
||||||
|
|
||||||
|
|
||||||
@@ -80,10 +79,9 @@ class GeminiFileUploader(FileUploader):
|
|||||||
"""
|
"""
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
display_name = purpose or file.filename
|
display_name = purpose or file.filename
|
||||||
|
|
||||||
# Create a file-like object for upload
|
|
||||||
file_data = io.BytesIO(content)
|
file_data = io.BytesIO(content)
|
||||||
file_data.name = file.filename
|
file_data.name = file.filename
|
||||||
|
|
||||||
@@ -91,7 +89,6 @@ class GeminiFileUploader(FileUploader):
|
|||||||
f"Uploading file '{file.filename}' to Gemini ({len(content)} bytes)"
|
f"Uploading file '{file.filename}' to Gemini ({len(content)} bytes)"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Upload using the genai client
|
|
||||||
uploaded_file = client.files.upload(
|
uploaded_file = client.files.upload(
|
||||||
file=file_data,
|
file=file_data,
|
||||||
config={
|
config={
|
||||||
@@ -199,7 +196,6 @@ class GeminiFileUploader(FileUploader):
|
|||||||
try:
|
try:
|
||||||
from google.genai.types import FileState
|
from google.genai.types import FileState
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# If we can't import FileState, just return True
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ class OpenAIFileUploader(FileUploader):
|
|||||||
"""
|
"""
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
|
|
||||||
content = file.source.read()
|
content = file.read()
|
||||||
file_purpose = purpose or "user_data"
|
file_purpose = purpose or "user_data"
|
||||||
|
|
||||||
file_data = io.BytesIO(content)
|
file_data = io.BytesIO(content)
|
||||||
@@ -96,9 +96,9 @@ class OpenAIFileUploader(FileUploader):
|
|||||||
|
|
||||||
return UploadResult(
|
return UploadResult(
|
||||||
file_id=uploaded_file.id,
|
file_id=uploaded_file.id,
|
||||||
file_uri=None, # OpenAI doesn't provide a URI
|
file_uri=None,
|
||||||
content_type=file.content_type,
|
content_type=file.content_type,
|
||||||
expires_at=None, # OpenAI files don't auto-expire
|
expires_at=None,
|
||||||
provider=self.provider_name,
|
provider=self.provider_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user