From 9cd1e86bd2ffb0ff239ab7ba3593801fcc560fee Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Thu, 2 Jul 2026 14:48:46 -0700 Subject: [PATCH] chore: drop AGE-90 manual dev runner script Removes scripts/age90_file_input_runner.py, a manual development test runner for PDF input handling that was never meant to ship in the repo. Co-Authored-By: Claude Fable 5 --- scripts/age90_file_input_runner.py | 226 ----------------------------- 1 file changed, 226 deletions(-) delete mode 100644 scripts/age90_file_input_runner.py diff --git a/scripts/age90_file_input_runner.py b/scripts/age90_file_input_runner.py deleted file mode 100644 index 3f97267da..000000000 --- a/scripts/age90_file_input_runner.py +++ /dev/null @@ -1,226 +0,0 @@ -# ruff: noqa: T201 -"""Manual runner for AGE-90 PDF input handling. - -Usage examples: - uv run python scripts/age90_file_input_runner.py - uv run python scripts/age90_file_input_runner.py --mode fallback - uv run python scripts/age90_file_input_runner.py --mode payload --pdf ./sample_story.pdf - uv run python scripts/age90_file_input_runner.py --mode kickoff --pdf ./sample_story.pdf -""" - -from __future__ import annotations - -import argparse -from collections.abc import Mapping, Sequence -from contextlib import nullcontext -import os -from pathlib import Path -from typing import Any -from unittest.mock import patch - -from crewai_files import PDFFile, format_multimodal_content, get_supported_content_types - - -ROOT = Path(__file__).resolve().parents[1] -DEFAULT_PDF = ROOT / "lib" / "crewai-files" / "tests" / "fixtures" / "agents.pdf" - - -def _content_summary(block: dict[str, Any]) -> dict[str, str]: - """Return a compact, non-base64 summary of a content block.""" - summary: dict[str, str] = {"type": str(block.get("type"))} - for key in ("file_id", "file_url", "filename", "image_url"): - if key in block: - value = str(block[key]) - summary[key] = value[:100] + ("..." if len(value) > 100 else "") - if "file_data" in block: - value = str(block["file_data"]) - summary["file_data"] = value[:80] + f"... ({len(value)} chars)" - return summary - - -def _sanitize_payload(value: Any) -> Any: - """Shorten large fields before printing API payloads.""" - if isinstance(value, Mapping): - sanitized: dict[str, Any] = {} - for key, item in value.items(): - if key == "file_data" and isinstance(item, str): - sanitized[key] = item[:100] + f"... ({len(item)} chars)" - else: - sanitized[str(key)] = _sanitize_payload(item) - return sanitized - - if isinstance(value, Sequence) and not isinstance(value, str | bytes): - return [_sanitize_payload(item) for item in value] - - return value - - -def inspect_native_path(pdf_path: Path, provider: str, api: str | None) -> None: - """Show whether the PDF is treated as a native multimodal input.""" - pdf = PDFFile(source=str(pdf_path)) - supported_types = get_supported_content_types(provider, api=api) - blocks = format_multimodal_content( - {"document": pdf}, - provider=provider, - api=api, - text="Summarize this PDF.", - ) - - print("\n== Native File Formatting ==") - print(f"PDF: {pdf_path}") - print(f"Provider/API: {provider} / {api or 'default'}") - print(f"Supported content types: {supported_types}") - print(f"Content block count: {len(blocks)}") - for index, block in enumerate(blocks, start=1): - print(f" {index}. {_content_summary(block)}") - - has_pdf_block = any(block.get("type") == "input_file" for block in blocks) - print(f"PDF native input_file block: {'YES' if has_pdf_block else 'NO'}") - - -def inspect_fallback_tool(pdf_path: Path) -> None: - """Show what read_file returns if a PDF falls back to the tool path.""" - from crewai.tools.agent_tools.read_file_tool import ReadFileTool - - tool = ReadFileTool() - tool.set_files({"document": PDFFile(source=str(pdf_path))}) - result = tool._run("document") - - print("\n== read_file Fallback ==") - print(f"Returned {len(result)} chars") - print(f"Contains Base64 marker: {'YES' if 'Base64:' in result else 'NO'}") - print("\nPreview:") - print(result[:1200]) - if len(result) > 1200: - print("...") - - -def run_crew_kickoff( - pdf_path: Path, - model: str, - api: str | None, - prompt: str, - *, - payload_only: bool = False, -) -> None: - """Run a real Crew kickoff against the supplied model.""" - from crewai import LLM, Agent, Crew, Task - - if model.startswith("openai/") and not os.getenv("OPENAI_API_KEY") and not payload_only: - raise SystemExit( - "OPENAI_API_KEY is not set. Export it before running --mode kickoff." - ) - - kwargs: dict[str, Any] = {"model": model, "temperature": 0} - if api: - kwargs["api"] = api - - llm = LLM(**kwargs) - agent = Agent( - role="PDF Analyst", - goal="Read the provided PDF and answer accurately from its contents", - backstory="You inspect uploaded files carefully and avoid guessing.", - llm=llm, - verbose=True, - ) - task = Task( - description=prompt, - expected_output="A concise answer grounded in the uploaded PDF.", - agent=agent, - ) - crew = Crew(agents=[agent], tasks=[task], verbose=True) - - print("\n== Crew Kickoff ==") - print(f"Model/API: {model} / {api or 'default'}") - print(f"PDF: {pdf_path}") - - context = nullcontext() - if payload_only: - from crewai.llms.providers.openai.completion import OpenAICompletion - - def print_payload_and_stop( - self: OpenAICompletion, - params: dict[str, Any], - *_args: Any, - **_kwargs: Any, - ) -> str: - print("\n== Sanitized Responses Payload ==") - print(_sanitize_payload(params)) - return "Payload debug complete." - - context = patch.object( - OpenAICompletion, - "_handle_responses", - print_payload_and_stop, - ) - - with context: - result = crew.kickoff(input_files={"document": PDFFile(source=str(pdf_path))}) - - print("\n== Final Output ==") - print(result.raw) - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--mode", - choices=("inspect", "fallback", "payload", "kickoff", "all"), - default="inspect", - help="What to run. 'inspect', 'fallback', and 'payload' do not call an LLM.", - ) - parser.add_argument( - "--pdf", - type=Path, - default=DEFAULT_PDF, - help="PDF file to test.", - ) - parser.add_argument( - "--provider", - default="gpt-4o-mini", - help="Provider/model string for file formatting inspection.", - ) - parser.add_argument( - "--model", - default="openai/gpt-4o-mini", - help="CrewAI model for real kickoff mode.", - ) - parser.add_argument( - "--api", - default="responses", - help="API variant. Use '' to omit.", - ) - parser.add_argument( - "--prompt", - default="Summarize the uploaded PDF in 3 bullet points. Do not guess.", - help="Task prompt for kickoff mode.", - ) - return parser.parse_args() - - -def main() -> None: - args = parse_args() - pdf_path = args.pdf.expanduser().resolve() - api = args.api or None - - if not pdf_path.exists(): - raise SystemExit(f"PDF not found: {pdf_path}") - - if args.mode in ("inspect", "all"): - inspect_native_path(pdf_path, args.provider, api) - if args.mode in ("fallback", "all"): - inspect_fallback_tool(pdf_path) - if args.mode == "payload": - run_crew_kickoff(pdf_path, args.model, api, args.prompt, payload_only=True) - if args.mode in ("kickoff", "all"): - run_crew_kickoff( - pdf_path, - args.model, - api, - args.prompt, - payload_only=args.mode == "all", - ) - - -if __name__ == "__main__": - main()