mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-03 06:08:15 +00:00
chore: drop AGE-90 manual dev runner script
Removes scripts/age90_file_input_runner.py, a manual development test runner for PDF input handling that was never meant to ship in the repo. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,226 +0,0 @@
|
||||
# ruff: noqa: T201
|
||||
"""Manual runner for AGE-90 PDF input handling.
|
||||
|
||||
Usage examples:
|
||||
uv run python scripts/age90_file_input_runner.py
|
||||
uv run python scripts/age90_file_input_runner.py --mode fallback
|
||||
uv run python scripts/age90_file_input_runner.py --mode payload --pdf ./sample_story.pdf
|
||||
uv run python scripts/age90_file_input_runner.py --mode kickoff --pdf ./sample_story.pdf
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from collections.abc import Mapping, Sequence
|
||||
from contextlib import nullcontext
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import patch
|
||||
|
||||
from crewai_files import PDFFile, format_multimodal_content, get_supported_content_types
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
DEFAULT_PDF = ROOT / "lib" / "crewai-files" / "tests" / "fixtures" / "agents.pdf"
|
||||
|
||||
|
||||
def _content_summary(block: dict[str, Any]) -> dict[str, str]:
|
||||
"""Return a compact, non-base64 summary of a content block."""
|
||||
summary: dict[str, str] = {"type": str(block.get("type"))}
|
||||
for key in ("file_id", "file_url", "filename", "image_url"):
|
||||
if key in block:
|
||||
value = str(block[key])
|
||||
summary[key] = value[:100] + ("..." if len(value) > 100 else "")
|
||||
if "file_data" in block:
|
||||
value = str(block["file_data"])
|
||||
summary["file_data"] = value[:80] + f"... ({len(value)} chars)"
|
||||
return summary
|
||||
|
||||
|
||||
def _sanitize_payload(value: Any) -> Any:
|
||||
"""Shorten large fields before printing API payloads."""
|
||||
if isinstance(value, Mapping):
|
||||
sanitized: dict[str, Any] = {}
|
||||
for key, item in value.items():
|
||||
if key == "file_data" and isinstance(item, str):
|
||||
sanitized[key] = item[:100] + f"... ({len(item)} chars)"
|
||||
else:
|
||||
sanitized[str(key)] = _sanitize_payload(item)
|
||||
return sanitized
|
||||
|
||||
if isinstance(value, Sequence) and not isinstance(value, str | bytes):
|
||||
return [_sanitize_payload(item) for item in value]
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def inspect_native_path(pdf_path: Path, provider: str, api: str | None) -> None:
|
||||
"""Show whether the PDF is treated as a native multimodal input."""
|
||||
pdf = PDFFile(source=str(pdf_path))
|
||||
supported_types = get_supported_content_types(provider, api=api)
|
||||
blocks = format_multimodal_content(
|
||||
{"document": pdf},
|
||||
provider=provider,
|
||||
api=api,
|
||||
text="Summarize this PDF.",
|
||||
)
|
||||
|
||||
print("\n== Native File Formatting ==")
|
||||
print(f"PDF: {pdf_path}")
|
||||
print(f"Provider/API: {provider} / {api or 'default'}")
|
||||
print(f"Supported content types: {supported_types}")
|
||||
print(f"Content block count: {len(blocks)}")
|
||||
for index, block in enumerate(blocks, start=1):
|
||||
print(f" {index}. {_content_summary(block)}")
|
||||
|
||||
has_pdf_block = any(block.get("type") == "input_file" for block in blocks)
|
||||
print(f"PDF native input_file block: {'YES' if has_pdf_block else 'NO'}")
|
||||
|
||||
|
||||
def inspect_fallback_tool(pdf_path: Path) -> None:
|
||||
"""Show what read_file returns if a PDF falls back to the tool path."""
|
||||
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
|
||||
|
||||
tool = ReadFileTool()
|
||||
tool.set_files({"document": PDFFile(source=str(pdf_path))})
|
||||
result = tool._run("document")
|
||||
|
||||
print("\n== read_file Fallback ==")
|
||||
print(f"Returned {len(result)} chars")
|
||||
print(f"Contains Base64 marker: {'YES' if 'Base64:' in result else 'NO'}")
|
||||
print("\nPreview:")
|
||||
print(result[:1200])
|
||||
if len(result) > 1200:
|
||||
print("...")
|
||||
|
||||
|
||||
def run_crew_kickoff(
|
||||
pdf_path: Path,
|
||||
model: str,
|
||||
api: str | None,
|
||||
prompt: str,
|
||||
*,
|
||||
payload_only: bool = False,
|
||||
) -> None:
|
||||
"""Run a real Crew kickoff against the supplied model."""
|
||||
from crewai import LLM, Agent, Crew, Task
|
||||
|
||||
if model.startswith("openai/") and not os.getenv("OPENAI_API_KEY") and not payload_only:
|
||||
raise SystemExit(
|
||||
"OPENAI_API_KEY is not set. Export it before running --mode kickoff."
|
||||
)
|
||||
|
||||
kwargs: dict[str, Any] = {"model": model, "temperature": 0}
|
||||
if api:
|
||||
kwargs["api"] = api
|
||||
|
||||
llm = LLM(**kwargs)
|
||||
agent = Agent(
|
||||
role="PDF Analyst",
|
||||
goal="Read the provided PDF and answer accurately from its contents",
|
||||
backstory="You inspect uploaded files carefully and avoid guessing.",
|
||||
llm=llm,
|
||||
verbose=True,
|
||||
)
|
||||
task = Task(
|
||||
description=prompt,
|
||||
expected_output="A concise answer grounded in the uploaded PDF.",
|
||||
agent=agent,
|
||||
)
|
||||
crew = Crew(agents=[agent], tasks=[task], verbose=True)
|
||||
|
||||
print("\n== Crew Kickoff ==")
|
||||
print(f"Model/API: {model} / {api or 'default'}")
|
||||
print(f"PDF: {pdf_path}")
|
||||
|
||||
context = nullcontext()
|
||||
if payload_only:
|
||||
from crewai.llms.providers.openai.completion import OpenAICompletion
|
||||
|
||||
def print_payload_and_stop(
|
||||
self: OpenAICompletion,
|
||||
params: dict[str, Any],
|
||||
*_args: Any,
|
||||
**_kwargs: Any,
|
||||
) -> str:
|
||||
print("\n== Sanitized Responses Payload ==")
|
||||
print(_sanitize_payload(params))
|
||||
return "Payload debug complete."
|
||||
|
||||
context = patch.object(
|
||||
OpenAICompletion,
|
||||
"_handle_responses",
|
||||
print_payload_and_stop,
|
||||
)
|
||||
|
||||
with context:
|
||||
result = crew.kickoff(input_files={"document": PDFFile(source=str(pdf_path))})
|
||||
|
||||
print("\n== Final Output ==")
|
||||
print(result.raw)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=("inspect", "fallback", "payload", "kickoff", "all"),
|
||||
default="inspect",
|
||||
help="What to run. 'inspect', 'fallback', and 'payload' do not call an LLM.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pdf",
|
||||
type=Path,
|
||||
default=DEFAULT_PDF,
|
||||
help="PDF file to test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
default="gpt-4o-mini",
|
||||
help="Provider/model string for file formatting inspection.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="openai/gpt-4o-mini",
|
||||
help="CrewAI model for real kickoff mode.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api",
|
||||
default="responses",
|
||||
help="API variant. Use '' to omit.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prompt",
|
||||
default="Summarize the uploaded PDF in 3 bullet points. Do not guess.",
|
||||
help="Task prompt for kickoff mode.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
pdf_path = args.pdf.expanduser().resolve()
|
||||
api = args.api or None
|
||||
|
||||
if not pdf_path.exists():
|
||||
raise SystemExit(f"PDF not found: {pdf_path}")
|
||||
|
||||
if args.mode in ("inspect", "all"):
|
||||
inspect_native_path(pdf_path, args.provider, api)
|
||||
if args.mode in ("fallback", "all"):
|
||||
inspect_fallback_tool(pdf_path)
|
||||
if args.mode == "payload":
|
||||
run_crew_kickoff(pdf_path, args.model, api, args.prompt, payload_only=True)
|
||||
if args.mode in ("kickoff", "all"):
|
||||
run_crew_kickoff(
|
||||
pdf_path,
|
||||
args.model,
|
||||
api,
|
||||
args.prompt,
|
||||
payload_only=args.mode == "all",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user