crewAI/lib/cli/src/crewai_cli/cli.py

from __future__ import annotations

from importlib.metadata import version as get_version
import os
import subprocess
from typing import Any

import click
from crewai_core.token_manager import TokenManager

from crewai_cli.add_crew_to_flow import add_crew_to_flow
from crewai_cli.authentication.main import AuthenticationCommand
from crewai_cli.config import Settings
from crewai_cli.create_agent import _strip_jsonc, create_agent
from crewai_cli.create_crew import create_crew
from crewai_cli.create_flow import create_flow
from crewai_cli.crew_chat import run_chat
from crewai_cli.deploy.main import DeployCommand
from crewai_cli.enterprise.main import EnterpriseConfigureCommand
from crewai_cli.evaluate_crew import evaluate_crew
from crewai_cli.install_crew import install_crew
from crewai_cli.kickoff_flow import kickoff_flow
from crewai_cli.organization.main import OrganizationCommand
from crewai_cli.plot_flow import plot_flow
from crewai_cli.remote_template.main import TemplateCommand
from crewai_cli.replay_from_task import replay_task_command
from crewai_cli.reset_memories_command import reset_memories_command
from crewai_cli.run_crew import run_crew
from crewai_cli.settings.main import SettingsCommand
from crewai_cli.task_outputs import load_task_outputs
from crewai_cli.tools.main import ToolCommand
from crewai_cli.train_crew import train_crew
from crewai_cli.triggers.main import TriggersCommand
from crewai_cli.update_crew import update_crew
from crewai_cli.user_data import (
    _load_user_data,
    is_tracing_enabled,
    update_user_data,
)
from crewai_cli.utils import (
    build_env_with_all_tool_credentials,
    load_env_vars,
    read_toml,
)


def _get_cli_version() -> str:
    """Return the best available version string for the CLI."""
    # Prefer crewai version if installed (keeps existing UX)
    try:
        return get_version("crewai")
    except Exception:
        pass
    try:
        return get_version("crewai-cli")
    except Exception:
        return "unknown"


@click.group()
@click.version_option(_get_cli_version())
def crewai() -> None:
    """Top-level command group for crewai."""
    from pathlib import Path

    try:
        for key, value in load_env_vars(Path.cwd()).items():
            if key not in os.environ:
                os.environ[key] = value
    except Exception:
        pass


@crewai.command(
    name="uv",
    context_settings={"ignore_unknown_options": True},
)
@click.argument("uv_args", nargs=-1, type=click.UNPROCESSED)
def uv(uv_args: tuple[str, ...]) -> None:
    """A wrapper around uv commands that adds custom tool authentication through env vars."""
    try:
        # Verify pyproject.toml exists first
        read_toml()
    except FileNotFoundError as e:
        raise SystemExit(
            "Error. A valid pyproject.toml file is required. Check that a valid pyproject.toml file exists in the current directory."
        ) from e
    except Exception as e:
        raise SystemExit(f"Error: {e}") from e

    env = build_env_with_all_tool_credentials()

    try:
        subprocess.run(  # noqa: S603
            ["uv", *uv_args],  # noqa: S607
            capture_output=False,
            env=env,
            text=True,
            check=True,
        )
    except subprocess.CalledProcessError as e:
        click.secho(f"uv command failed with exit code {e.returncode}", fg="red")
        raise SystemExit(e.returncode) from e


@crewai.command()
@click.argument("type", type=click.Choice(["crew", "flow", "agent"]))
@click.argument("name", required=False, default=None)
@click.option("--provider", type=str, help="The provider to use for the crew")
@click.option("--skip_provider", is_flag=True, help="Skip provider validation")
def create(
    type: str, name: str | None, provider: str | None, skip_provider: bool = False
) -> None:
    """Create a new crew, flow, or agent.

    For agents, NAME is optional — omit it to enter interactive mode.
    """
    if type == "crew":
        if name is None:
            click.secho("Error: name is required for crew creation.", fg="red")
            raise SystemExit(1)
        create_crew(name, provider, skip_provider)
    elif type == "flow":
        if name is None:
            click.secho("Error: name is required for flow creation.", fg="red")
            raise SystemExit(1)
        create_flow(name)
    elif type == "agent":
        create_agent(name)
    else:
        click.secho(
            "Error: Invalid type. Must be 'crew', 'flow', or 'agent'.", fg="red"
        )


@crewai.command()
@click.option(
    "--tools", is_flag=True, help="Show the installed version of crewai tools"
)
def version(tools: bool) -> None:
    """Show the installed version of crewai."""
    try:
        crewai_version = get_version("crewai")
    except Exception:
        crewai_version = "unknown version"
    click.echo(f"crewai version: {crewai_version}")

    if tools:
        try:
            tools_version = get_version("crewai-tools")
            click.echo(f"crewai tools version: {tools_version}")
        except Exception:
            click.echo("crewai tools not installed")


@crewai.command()
@click.option(
    "-n",
    "--n_iterations",
    type=int,
    default=5,
    help="Number of iterations to run training feedback.",
)
@click.option(
    "-f",
    "--filename",
    type=str,
    default="trained_agents_data.pkl",
    help="Path to a trained-agents pickle (Crew projects only).",
)
def train(n_iterations: int, filename: str) -> None:
    """Train the crew or agents.

    Auto-detects project type: if agents/ directory exists, runs interactive
    NewAgent training (feedback → canonical memories). Otherwise falls back to
    legacy Crew training.
    """
    from pathlib import Path

    from crewai_cli.run_crew import _needs_uv_relaunch, _relaunch_via_uv

    agents_dir = Path("agents")
    agent_files = (
        sorted(agents_dir.glob("*.json")) + sorted(agents_dir.glob("*.jsonc"))
        if agents_dir.is_dir()
        else []
    )

    if agent_files:
        if _needs_uv_relaunch():
            _relaunch_via_uv(["train", "-n", str(n_iterations), "-f", filename])
        _train_new_agents(agent_files, n_iterations)
    else:
        click.echo(f"Training the Crew for {n_iterations} iterations")
        train_crew(n_iterations, filename)


def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None:
    """Run interactive training for NewAgent agents.

    For each agent, loads benchmark cases, runs them, shows the response,
    and asks the user for feedback. Feedback is saved as canonical memories.
    """
    import asyncio
    from pathlib import Path

    from crewai_cli.benchmark import load_benchmark_cases

    tests_dir = Path("tests")
    if not tests_dir.is_dir() and Path("benchmarks").is_dir():
        tests_dir = Path("benchmarks")
    agents_trained: set[str] = set()

    for agent_path in agent_files:
        agent_name = agent_path.stem
        cases_path = tests_dir / f"{agent_name}_cases.json"

        if not cases_path.exists():
            click.secho(f"  Skipping {agent_name} — no {cases_path}", fg="yellow")
            continue

        try:
            cases = load_benchmark_cases(cases_path)
        except (FileNotFoundError, ValueError) as e:
            click.secho(f"  Error loading cases for {agent_name}: {e}", fg="red")
            continue

        click.echo()
        click.secho(
            f"Training {agent_name} ({len(cases)} cases, {n_iterations} iterations)",
            fg="cyan",
            bold=True,
        )

        try:
            from crewai.new_agent.definition_parser import load_agent_from_definition

            agent = load_agent_from_definition(
                str(agent_path), agents_dir=agent_path.parent
            )
        except Exception as e:
            click.secho(f"  Error loading agent {agent_name}: {e}", fg="red")
            continue

        from rich.console import Console as _Console

        _console = _Console()
        _loop = asyncio.new_event_loop()
        asyncio.set_event_loop(_loop)

        for iteration in range(n_iterations):
            click.secho(f"\n  Iteration {iteration + 1}/{n_iterations}", fg="cyan")
            for ci, case in enumerate(cases):
                user_input = case.input
                snippet = user_input[:60] + ("…" if len(user_input) > 60 else "")
                _console.print(f"\n  \\[{ci + 1}/{len(cases)}] {snippet}")

                try:
                    import time as _time

                    _t0 = _time.monotonic()
                    with _console.status("[cyan]  Running…[/]", spinner="dots"):
                        response = _loop.run_until_complete(agent.amessage(user_input))
                    _elapsed = _time.monotonic() - _t0
                    _console.print(f"  [green]✓[/] done ({_elapsed:.1f}s)")
                    click.echo(f"  Response: {response.content[:500]}")
                except Exception as e:
                    _console.print(f"  [red]✗[/] error: {e}")
                    continue

                if case.criteria:
                    click.echo(f"  Criteria: {case.criteria}")

                feedback = click.prompt(
                    "  Feedback (Enter to skip, or type feedback)",
                    default="",
                    show_default=False,
                )
                if feedback.strip():
                    agent.train(
                        feedback=feedback.strip(),
                        task_context=f"Input: {user_input}\nResponse: {response.content[:300]}",
                    )
                    click.secho("  ✓ Feedback saved as canonical memory", fg="green")

        _loop.close()
        agents_trained.add(agent_name)

    click.echo()
    if len(agents_trained) == 0:
        click.secho("No agents with matching benchmark cases found.", fg="yellow")
    else:
        click.secho(
            f"Training complete ({len(agents_trained)} agent(s)).",
            fg="green",
            bold=True,
        )


@crewai.command()
@click.option(
    "-t",
    "--task_id",
    type=str,
    help="Replay the crew from this task ID, including all subsequent tasks.",
)
@click.option(
    "-f",
    "--filename",
    "trained_agents_file",
    type=str,
    default=None,
    help=(
        "Path to a trained-agents pickle (produced by `crewai train -f`). "
        "When set, agents load suggestions from this file instead of the "
        "default trained_agents_data.pkl. Equivalent to setting "
        "CREWAI_TRAINED_AGENTS_FILE."
    ),
)
def replay(task_id: str, trained_agents_file: str | None) -> None:
    """Replay the crew execution from a specific task.

    Args:
        task_id: The ID of the task to replay from.
        trained_agents_file: Optional trained-agents pickle path.
    """
    try:
        click.echo(f"Replaying the crew from task {task_id}")
        replay_task_command(task_id, trained_agents_file=trained_agents_file)
    except Exception as e:
        click.echo(f"An error occurred while replaying: {e}", err=True)


@crewai.command()
def log_tasks_outputs() -> None:
    """Retrieve your latest crew.kickoff() task outputs."""
    try:
        tasks = load_task_outputs()

        if not tasks:
            click.echo(
                "No task outputs found. Only crew kickoff task outputs are logged."
            )
            return

        for index, task in enumerate(tasks, 1):
            click.echo(f"Task {index}: {task['task_id']}")
            click.echo(f"Description: {task['expected_output']}")
            click.echo("------")

    except Exception as e:
        click.echo(f"An error occurred while logging task outputs: {e}", err=True)


@crewai.command()
@click.option("-m", "--memory", is_flag=True, help="Reset MEMORY")
@click.option(
    "-l",
    "--long",
    is_flag=True,
    hidden=True,
    help="[Deprecated: use --memory] Reset memory",
)
@click.option(
    "-s",
    "--short",
    is_flag=True,
    hidden=True,
    help="[Deprecated: use --memory] Reset memory",
)
@click.option(
    "-e",
    "--entities",
    is_flag=True,
    hidden=True,
    help="[Deprecated: use --memory] Reset memory",
)
@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage")
@click.option(
    "-akn", "--agent-knowledge", is_flag=True, help="Reset AGENT KNOWLEDGE storage"
)
@click.option(
    "-k", "--kickoff-outputs", is_flag=True, help="Reset LATEST KICKOFF TASK OUTPUTS"
)
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
def reset_memories(
    memory: bool,
    long: bool,
    short: bool,
    entities: bool,
    knowledge: bool,
    kickoff_outputs: bool,
    agent_knowledge: bool,
    all: bool,
) -> None:
    """Reset the crew memories (memory, knowledge, agent_knowledge, kickoff_outputs). This will delete all the data saved."""
    try:
        if long or short or entities:
            legacy_used = [
                f
                for f, v in [
                    ("--long", long),
                    ("--short", short),
                    ("--entities", entities),
                ]
                if v
            ]
            click.echo(
                f"Warning: {', '.join(legacy_used)} {'is' if len(legacy_used) == 1 else 'are'} "
                "deprecated. Use --memory (-m) instead. All memory is now unified."
            )
            memory = True

        memory_types = [
            memory,
            knowledge,
            agent_knowledge,
            kickoff_outputs,
            all,
        ]
        if not any(memory_types):
            click.echo(
                "Please specify at least one memory type to reset using the appropriate flags."
            )
            return
        reset_memories_command(memory, knowledge, agent_knowledge, kickoff_outputs, all)
    except Exception as e:
        click.echo(f"An error occurred while resetting memories: {e}", err=True)


@crewai.command()
@click.option(
    "--storage-path",
    type=str,
    default=None,
    help="Path to LanceDB memory directory. If omitted, uses ./.crewai/memory.",
)
@click.option(
    "--embedder-provider",
    type=str,
    default=None,
    help="Embedder provider for recall queries (e.g. openai, google-vertex, cohere, ollama).",
)
@click.option(
    "--embedder-model",
    type=str,
    default=None,
    help="Embedder model name (e.g. text-embedding-3-small, gemini-embedding-001).",
)
@click.option(
    "--embedder-config",
    type=str,
    default=None,
    help='Full embedder config as JSON (e.g. \'{"provider": "cohere", "config": {"model_name": "embed-v4.0"}}\').',
)
def memory(
    storage_path: str | None,
    embedder_provider: str | None,
    embedder_model: str | None,
    embedder_config: str | None,
) -> None:
    """Open the Memory TUI to browse scopes and recall memories."""
    try:
        from crewai_cli.memory_tui import MemoryTUI
    except ImportError as exc:
        click.echo(
            "Textual is required for the memory TUI but could not be imported. "
            "Try reinstalling crewai or: pip install textual"
        )
        raise SystemExit(1) from exc

    # Build embedder spec from CLI flags.
    embedder_spec: dict[str, Any] | None = None
    if embedder_config:
        import json as _json

        try:
            embedder_spec = _json.loads(embedder_config)
        except _json.JSONDecodeError as exc:
            click.echo(f"Invalid --embedder-config JSON: {exc}")
            raise SystemExit(1) from exc
    elif embedder_provider:
        cfg: dict[str, str] = {}
        if embedder_model:
            cfg["model_name"] = embedder_model
        embedder_spec = {"provider": embedder_provider, "config": cfg}

    app = MemoryTUI(storage_path=storage_path, embedder_config=embedder_spec)
    app.run()


@crewai.command()
@click.option(
    "-n",
    "--n_iterations",
    type=int,
    default=None,
    help="Number of iterations to run. "
    "Defaults to test.iterations in config.json (3 if not set).",
)
@click.option(
    "-m",
    "--model",
    type=str,
    default=None,
    help="LLM model to test with. For NewAgent, defaults to each agent's configured model.",
)
@click.option(
    "-f",
    "--filename",
    "trained_agents_file",
    type=str,
    default=None,
    help="Path to a trained-agents pickle (Crew projects only).",
)
@click.option(
    "--threshold",
    type=float,
    default=None,
    help="Minimum score to pass a test case (NewAgent only, 0.0-1.0). "
    "Defaults to test.threshold in config.json (0.7 if not set).",
)
@click.option(
    "--judge-model",
    type=str,
    default=None,
    help="LLM model for evaluation judging (NewAgent only). "
    "Defaults to test.judge_model in config.json (openai/gpt-4o-mini if not set).",
)
@click.option(
    "-v",
    "--verbose",
    is_flag=True,
    help="Show agent execution details (tool calls, LLM responses, errors).",
)
def test(
    n_iterations: int | None,
    model: str | None,
    trained_agents_file: str | None,
    threshold: float | None,
    judge_model: str | None,
    verbose: bool,
) -> None:
    """Test the crew or agents and evaluate the results.

    Auto-detects project type: if agents/ directory exists with .json/.jsonc
    files, runs NewAgent benchmarks. Otherwise falls back to legacy Crew testing.
    """
    from pathlib import Path

    from crewai_cli.run_crew import _needs_uv_relaunch, _relaunch_via_uv

    agents_dir = Path("agents")
    agent_files = (
        sorted(agents_dir.glob("*.json")) + sorted(agents_dir.glob("*.jsonc"))
        if agents_dir.is_dir()
        else []
    )

    if agent_files:
        effective_judge = (
            judge_model or _read_config("test", "judge_model") or "openai/gpt-4o-mini"
        )

        config_iterations = _read_config("test", "iterations")
        effective_iterations = (
            n_iterations
            if n_iterations is not None
            else (int(config_iterations) if config_iterations is not None else 3)
        )

        if _needs_uv_relaunch():
            uv_args = [
                "test",
                "-n",
                str(effective_iterations),
                "--judge-model",
                effective_judge,
            ]
            if threshold is not None:
                uv_args.extend(["--threshold", str(threshold)])
            if model:
                uv_args.extend(["-m", model])
            if trained_agents_file:
                uv_args.extend(["-f", trained_agents_file])
            if verbose:
                uv_args.append("-v")
            _relaunch_via_uv(uv_args)

        config_threshold = _read_config("test", "threshold")
        if config_threshold is None:
            config_threshold = _read_config("test_threshold")
        effective_threshold = (
            threshold
            if threshold is not None
            else (float(config_threshold) if config_threshold is not None else 0.7)
        )

        config_timeout = _read_config("test", "case_timeout")
        effective_timeout = int(config_timeout) if config_timeout is not None else 90

        _test_new_agents(
            agent_files,
            effective_iterations,
            model,
            effective_threshold,
            effective_judge,
            verbose=verbose,
            case_timeout=effective_timeout,
        )
    else:
        legacy_iterations = n_iterations if n_iterations is not None else 3
        crew_model = model or "gpt-4o-mini"
        click.echo(
            f"Testing the crew for {legacy_iterations} iterations with model {crew_model}"
        )
        evaluate_crew(
            legacy_iterations, crew_model, trained_agents_file=trained_agents_file
        )


def _read_config(*keys: str) -> Any:
    """Read a nested value from config.json (JSONC-safe).

    Example: _read_config("test", "threshold") reads config["test"]["threshold"].
    Returns None only when the key is missing, not when the value is falsy.
    """
    import json
    from pathlib import Path

    _MISSING = object()
    config_path = Path("config.json")
    if not config_path.exists():
        return None
    try:
        raw = config_path.read_text(encoding="utf-8")
        clean = _strip_jsonc(raw)
        data = json.loads(clean)
        for k in keys:
            if not isinstance(data, dict):
                return None
            data = data.get(k, _MISSING)
            if data is _MISSING:
                return None
        return data
    except Exception:
        return None


def _save_run_results(
    results: dict[str, list[Any]] | dict[tuple[str, str], list[Any]],
    *,
    command: str,
    threshold: float | None = None,
    n_iterations: int = 1,
    judge_model: str = "",
    jobs: list[dict[str, Any]] | None = None,
) -> str:
    """Save benchmark/test results to .crewai/runs/<command>_latest.json and return the path."""
    import datetime
    import json
    from pathlib import Path

    runs_dir = Path(".crewai") / "runs"
    runs_dir.mkdir(parents=True, exist_ok=True)

    run_data: dict[str, Any] = {
        "command": command,
        "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(),
        "n_iterations": n_iterations,
        "judge_model": judge_model,
    }
    if threshold is not None:
        run_data["threshold"] = threshold

    agents_data: dict[str, Any] = {}
    for key, result_list in results.items():
        if isinstance(key, tuple):
            agent_name, model_key = key
            section_key = f"{agent_name}/{model_key}"
        else:
            section_key = key

        cases: list[dict[str, Any]] = []
        for r in result_list:
            effective_passed = (
                r.score >= threshold if threshold is not None else r.passed
            )
            case: dict[str, Any] = {
                "case": r.case_index + 1,
                "input": r.input,
                "output": r.actual,
                "score": r.score,
                "passed": effective_passed,
                "time_ms": r.response_time_ms,
                "input_tokens": r.input_tokens,
                "output_tokens": r.output_tokens,
            }
            if r.expected:
                case["expected"] = r.expected
            if r.cost is not None:
                case["cost"] = r.cost
            cases.append(case)

        total = len(cases)
        passed = sum(1 for c in cases if c["passed"])
        avg_score = sum(c["score"] for c in cases) / total if total else 0.0
        agents_data[section_key] = {
            "passed": passed,
            "total": total,
            "avg_score": round(avg_score, 4),
            "cases": cases,
        }

    run_data["results"] = agents_data

    out_path = runs_dir / f"{command}_latest.json"
    out_path.write_text(json.dumps(run_data, indent=2, ensure_ascii=False) + "\n")
    return str(out_path)


class _BenchmarkLiveProgress:
    """Live parallel progress display for benchmark runs."""

    def __init__(self, console: Any = None, n_iterations: int = 1) -> None:
        from rich.console import Console

        self._console = console or Console()
        self._state: dict[str, dict[str, Any]] = {}
        self._live: Any = None
        self._n_iterations = n_iterations
        self._current_iteration = 0

    def start(self, iteration: int = 0) -> None:
        from rich.live import Live

        self._current_iteration = iteration
        self._state.clear()
        self._live = Live(
            self._render(),
            console=self._console,
            refresh_per_second=10,
            transient=True,
        )
        self._live.start()

    def stop(self) -> None:
        if self._live:
            self._live.update(self._render())
            self._live.stop()
            self._live = None

    def on_progress(self, event: dict[str, Any]) -> None:
        t = event["type"]
        model = event.get("model", "")

        if t == "model_start":
            self._state[model] = {
                "done": 0,
                "total": event["total_cases"],
                "status": "starting",
                "passed": 0,
                "avg": 0.0,
                "time": 0.0,
                "in_tokens": 0,
                "out_tokens": 0,
                "cost": None,
            }
        elif t == "case_start":
            self._state[model]["status"] = "running"
        elif t == "judging":
            self._state[model]["status"] = "judging"
        elif t == "case_done":
            s = self._state[model]
            s["done"] = s.get("done", 0) + 1
            if event.get("passed"):
                s["passed"] += 1
            s["status"] = "running"
        elif t == "model_done":
            s = self._state[model]
            s["status"] = "done"
            s["passed"] = event.get("passed", s["passed"])
            s["done"] = event.get("total", s["done"])
            s["avg"] = event["avg_score"]
            s["time"] = event.get("total_time", 0.0)
            s["in_tokens"] = event.get("input_tokens", 0)
            s["out_tokens"] = event.get("output_tokens", 0)
            s["cost"] = event.get("total_cost")

        if self._live:
            self._live.update(self._render())

    def _render(self) -> Any:
        from rich import box
        from rich.console import Group
        from rich.spinner import Spinner
        from rich.table import Table
        from rich.text import Text

        from crewai_cli.benchmark import _fmt_cost, _fmt_tokens, _score_color

        has_cost = any(
            info.get("cost") is not None
            for info in self._state.values()
            if info["status"] == "done"
        )
        n_cols = 7 if has_cost else 6

        parts: list[Any] = []
        if self._n_iterations > 1:
            parts.append(
                Text(
                    f"  Iteration {self._current_iteration + 1}/{self._n_iterations}",
                    style="cyan",
                )
            )

        table = Table(box=box.SIMPLE, show_header=False, padding=(0, 1), expand=False)
        table.add_column("", width=1)  # icon
        table.add_column("", no_wrap=True)  # model
        table.add_column("", no_wrap=True, justify="right")  # passed or bar
        table.add_column("", no_wrap=True, justify="right")  # score
        table.add_column("", no_wrap=True, justify="right")  # time
        table.add_column("", no_wrap=True, justify="right")  # tokens
        if has_cost:
            table.add_column("", no_wrap=True, justify="right")  # cost

        for model, info in self._state.items():
            icon: Any
            if info["status"] == "done":
                icon = Text("✓", style="green")
                color = _score_color(info["avg"])
                cols = [
                    icon,
                    model,
                    Text.from_markup(
                        f"[{color}]{info['passed']}/{info['total']}[/{color}]"
                    ),
                    Text.from_markup(f"[{color}]{info['avg']:.2f}[/{color}]"),
                    Text(f"{info['time']:.1f}s", style="dim"),
                    Text(
                        f"↑{_fmt_tokens(info['in_tokens'])} ↓{_fmt_tokens(info['out_tokens'])}",
                        style="dim",
                    ),
                ]
                if has_cost:
                    if info["cost"] is not None:
                        cols.append(Text(_fmt_cost(info["cost"]), style="dim"))
                    else:
                        cols.append(Text(""))
            else:
                bar_w = 10
                pct = info["done"] / info["total"] if info["total"] > 0 else 0
                filled = round(pct * bar_w)
                icon = Spinner("dots", style="cyan")
                progress = Text.from_markup(
                    f"[cyan]{'█' * filled}{'░' * (bar_w - filled)}[/cyan] {info['done']}/{info['total']}"
                )
                cols = [icon, model, progress] + [Text("")] * (n_cols - 3)

            table.add_row(*cols)

        if parts:
            parts.append(table)
            return Group(*parts)
        return table


def _test_new_agents(
    agent_files: list[Any],
    n_iterations: int,
    model: str | None,
    threshold: float,
    judge_model: str,
    verbose: bool = False,
    case_timeout: int = 90,
) -> None:
    """Run NewAgent test cases with pass/fail threshold (all agents in parallel)."""
    import asyncio
    from pathlib import Path

    from rich.console import Console as _RichConsole

    from crewai_cli.benchmark import (
        load_benchmark_cases,
        run_benchmark,
    )

    _con = _RichConsole()
    tests_dir = Path("tests")
    if not tests_dir.is_dir() and Path("benchmarks").is_dir():
        tests_dir = Path("benchmarks")

    # Collect valid agents + cases
    jobs: list[dict[str, Any]] = []
    for agent_path in agent_files:
        agent_name = agent_path.stem
        cases_path = tests_dir / f"{agent_name}_cases.json"

        if not cases_path.exists():
            click.secho(f"  Skipping {agent_name} — no {cases_path} found", fg="yellow")
            continue

        try:
            loaded = load_benchmark_cases(cases_path)
        except (FileNotFoundError, ValueError) as e:
            click.secho(f"  Error loading cases for {agent_name}: {e}", fg="red")
            continue

        file_threshold = loaded.threshold if loaded.threshold is not None else threshold
        jobs.append(
            {
                "agent_name": agent_name,
                "agent_path": str(agent_path.resolve()),
                "cases": loaded.cases,
                "threshold": file_threshold,
            }
        )

    if not jobs:
        click.secho("No agents with matching benchmark cases found.", fg="yellow")
        raise SystemExit(1)

    model_list = [model] if model else None

    # Progress display — prefix model key with agent name
    progress = (
        None
        if verbose
        else _BenchmarkLiveProgress(console=_con, n_iterations=n_iterations)
    )

    def _make_progress_cb(agent_name: str) -> Any:
        def _cb(event: dict[str, Any]) -> None:
            if progress is not None:
                prefixed = dict(event)
                if "model" in prefixed:
                    prefixed["model"] = f"{agent_name}/{prefixed['model']}"
                progress.on_progress(prefixed)

        return _cb

    async def _run_all() -> Any:
        tasks = []
        for job in jobs:
            tasks.append(
                run_benchmark(
                    agent_def=job["agent_path"],
                    cases=job["cases"],
                    models=model_list,
                    judge_model=judge_model,
                    on_progress=None
                    if verbose
                    else _make_progress_cb(job["agent_name"]),
                    verbose=verbose,
                    case_timeout=case_timeout,
                )
            )
        return await asyncio.gather(*tasks, return_exceptions=True)

    case_count = sum(len(j["cases"]) for j in jobs)
    click.echo()
    click.secho(
        f"Testing {len(jobs)} agent(s), {case_count} cases, {n_iterations} iteration(s) (threshold={threshold})",
        fg="cyan",
        bold=True,
    )

    from crewai_cli.benchmark import (
        ArtifactsSandbox,
        SuppressBenchmarkOutput,
        VerboseBenchmarkOutput,
        _fmt_cost,
        _fmt_tokens,
        _score_color,
    )

    all_passed = True
    agents_tested: set[str] = set()
    # Accumulate results across iterations: (agent_name, model_key) → [BenchmarkResult, ...]
    agg_results: dict[tuple[str, str], list[Any]] = {}
    agg_jobs: dict[tuple[str, str], dict[str, Any]] = {}

    _loop = asyncio.new_event_loop()
    asyncio.set_event_loop(_loop)

    iter_marks: list[str] = []

    for iteration in range(n_iterations):
        try:
            if not verbose:
                if progress is None:
                    raise RuntimeError("progress must not be None in non-verbose mode")
                progress.start(iteration=iteration)
            output_ctx = (
                VerboseBenchmarkOutput() if verbose else SuppressBenchmarkOutput()
            )
            with ArtifactsSandbox(), output_ctx:
                all_results = _loop.run_until_complete(_run_all())
        finally:
            if not verbose:
                if progress is None:
                    raise RuntimeError("progress must not be None in non-verbose mode")
                progress.stop()

        iter_ok = True
        for job, result in zip(jobs, all_results):
            if isinstance(result, Exception):
                iter_ok = False
                all_passed = False
                continue
            agents_tested.add(job["agent_name"])
            for model_key, results in result.items():
                key = (job["agent_name"], model_key)
                agg_results.setdefault(key, []).extend(results)
                agg_jobs[key] = job
                if any(r.score < job["threshold"] for r in results):
                    iter_ok = False

        iter_marks.append("[green]✓[/green]" if iter_ok else "[red]✗[/red]")

    if n_iterations > 1:
        _con.print(f"  Iterations: {' '.join(iter_marks)}")

    _loop.close()

    click.echo()

    # Compute averaged stats per agent/model, then print column-aligned
    n_iter = max(n_iterations, 1)
    rows: list[dict[str, Any]] = []
    for key in agg_results:
        agent_name, model_key = key
        job = agg_jobs[key]
        results = agg_results[key]
        total = len(results)
        passed_count = sum(1 for r in results if r.score >= job["threshold"])
        cases_per_iter = total // n_iter if n_iter else total
        pass_per_iter = passed_count // n_iter if n_iter else passed_count
        avg_score = sum(r.score for r in results) / total if total else 0.0
        avg_time = sum(r.response_time_ms for r in results) / 1000 / n_iter
        avg_cost = sum(r.cost or 0.0 for r in results) / n_iter

        rows.append(
            {
                "label": f"{agent_name}/{model_key}",
                "passed": passed_count == total,
                "ratio": f"{pass_per_iter}/{cases_per_iter}",
                "score": avg_score,
                "time": f"{avg_time:.1f}s",
                "tokens": f"↑{_fmt_tokens(int(sum(r.input_tokens for r in results) / n_iter))} ↓{_fmt_tokens(int(sum(r.output_tokens for r in results) / n_iter))}",
                "cost": _fmt_cost(avg_cost) if avg_cost > 0 else "",
            }
        )

    w_label = max((len(r["label"]) for r in rows), default=0)
    w_ratio = max((len(r["ratio"]) for r in rows), default=0)
    w_time = max((len(r["time"]) for r in rows), default=0)
    w_tokens = max((len(r["tokens"]) for r in rows), default=0)
    has_cost = any(r["cost"] for r in rows)

    for r in rows:
        color = _score_color(r["score"])
        icon = "[green]✓[/green]" if r["passed"] else "[red]✗[/red]"
        line = (
            f"  {icon}   {r['label']:<{w_label}}"
            f"   [{color}]{r['ratio']:>{w_ratio}}[/{color}]"
            f"   [{color}]{r['score']:.2f}[/{color}]"
            f"   [dim]{r['time']:>{w_time}}[/dim]"
            f"   [dim]{r['tokens']:>{w_tokens}}[/dim]"
        )
        if has_cost:
            line += f"   [dim]{r['cost']:>6}[/dim]"
        _con.print(line)

    click.echo()

    # Pass/fail summary per agent (report per-iteration case counts)
    for key in agg_results:
        agent_name, model_key = key
        job = agg_jobs[key]
        results = agg_results[key]
        cases_per_iter = len(results) // n_iter if n_iter else len(results)
        failed = [r for r in results if r.score < job["threshold"]]
        if failed:
            all_passed = False
            unique_failed = len({r.case_index for r in failed})
            _con.print(
                f"  [red bold]{agent_name}: FAILED {unique_failed}/{cases_per_iter} "
                f"cases below {job['threshold']}[/red bold]"
            )
            seen: set[int] = set()
            for r in failed:
                if r.case_index in seen:
                    continue
                seen.add(r.case_index)
                inp = r.input[:50] + ("…" if len(r.input) > 50 else "")
                scores = [f.score for f in failed if f.case_index == r.case_index]
                avg = sum(scores) / len(scores)
                _con.print(
                    f"    [red]#{r.case_index + 1}[/red] [dim]{inp}[/dim]  [red]{avg:.2f}[/red]"
                )
        else:
            _con.print(
                f"  [green]{agent_name}: PASSED all {cases_per_iter} cases >= {job['threshold']}[/green]"
            )

    # Save detailed results to disk
    saved = _save_run_results(
        agg_results,
        command="test",
        threshold=threshold,
        n_iterations=n_iterations,
        judge_model=judge_model,
    )
    _con.print(f"  [dim]Results saved to {saved}[/dim]")

    click.echo()
    if len(agents_tested) == 0:
        click.secho("No agents completed successfully.", fg="yellow")
        raise SystemExit(1)
    if all_passed:
        click.secho(
            f"All tests passed ({len(agents_tested)} agent(s)).", fg="green", bold=True
        )
    else:
        click.secho("Some tests failed.", fg="red", bold=True)
        raise SystemExit(1)


@crewai.command(
    context_settings={
        "ignore_unknown_options": True,
        "allow_extra_args": True,
    }
)
@click.pass_context
def install(context: click.Context) -> None:
    """Install the Crew."""
    install_crew(context.args)


@crewai.command()
@click.option(
    "-f",
    "--filename",
    "trained_agents_file",
    type=str,
    default=None,
    help=(
        "Path to a trained-agents pickle (produced by `crewai train -f`). "
        "When set, agents load suggestions from this file instead of the "
        "default trained_agents_data.pkl. Equivalent to setting "
        "CREWAI_TRAINED_AGENTS_FILE."
    ),
)
def run(trained_agents_file: str | None) -> None:
    """Run the Crew."""
    run_crew(trained_agents_file=trained_agents_file)


@crewai.command()
def update() -> None:
    """Update the pyproject.toml of the Crew project to use uv."""
    update_crew()


@crewai.command()
def login() -> None:
    """Sign Up/Login to CrewAI AMP."""
    Settings().clear_user_settings()
    AuthenticationCommand().login()


@crewai.command()
@click.option(
    "--reset", is_flag=True, help="Also reset all CLI configuration to defaults"
)
def logout(reset: bool) -> None:
    """Logout from CrewAI AMP."""
    settings = Settings()
    if reset:
        settings.reset()
        click.echo("Successfully logged out and reset all CLI configuration.")
    else:
        TokenManager().clear_tokens()
        settings.clear_user_settings()
        click.echo("Successfully logged out from CrewAI AMP.")


# DEPLOY CREWAI+ COMMANDS
@crewai.group()
def deploy() -> None:
    """Deploy the Crew CLI group."""


@deploy.command(name="create")
@click.option("-y", "--yes", is_flag=True, help="Skip the confirmation prompt")
@click.option(
    "--skip-validate",
    is_flag=True,
    help="Skip the pre-deploy validation checks.",
)
def deploy_create(yes: bool, skip_validate: bool) -> None:
    """Create a Crew deployment."""
    deploy_cmd = DeployCommand()
    deploy_cmd.create_crew(yes, skip_validate=skip_validate)


@deploy.command(name="list")
def deploy_list() -> None:
    """List all deployments."""
    deploy_cmd = DeployCommand()
    deploy_cmd.list_crews()


@deploy.command(name="push")
@click.option("-u", "--uuid", type=str, help="Crew UUID parameter")
@click.option(
    "--skip-validate",
    is_flag=True,
    help="Skip the pre-deploy validation checks.",
)
def deploy_push(uuid: str | None, skip_validate: bool) -> None:
    """Deploy the Crew."""
    deploy_cmd = DeployCommand()
    deploy_cmd.deploy(uuid=uuid, skip_validate=skip_validate)


@deploy.command(name="validate")
def deploy_validate() -> None:
    """Validate the current project against common deployment failures.

    Runs the same pre-deploy checks that `crewai deploy create` and
    `crewai deploy push` run automatically, without contacting the platform.
    Exits non-zero if any blocking issues are found.
    """
    from crewai_cli.deploy.validate import run_validate_command

    run_validate_command()


@deploy.command(name="status")
@click.option("-u", "--uuid", type=str, help="Crew UUID parameter")
def deply_status(uuid: str | None) -> None:
    """Get the status of a deployment."""
    deploy_cmd = DeployCommand()
    deploy_cmd.get_crew_status(uuid=uuid)


@deploy.command(name="logs")
@click.option("-u", "--uuid", type=str, help="Crew UUID parameter")
def deploy_logs(uuid: str | None) -> None:
    """Get the logs of a deployment."""
    deploy_cmd = DeployCommand()
    deploy_cmd.get_crew_logs(uuid=uuid)


@deploy.command(name="remove")
@click.option("-u", "--uuid", type=str, help="Crew UUID parameter")
def deploy_remove(uuid: str | None) -> None:
    """Remove a deployment."""
    deploy_cmd = DeployCommand()
    deploy_cmd.remove_crew(uuid=uuid)


@crewai.group()
def tool() -> None:
    """Tool Repository related commands."""


@tool.command(name="create")
@click.argument("handle")
def tool_create(handle: str) -> None:
    tool_cmd = ToolCommand()
    tool_cmd.create(handle)


@tool.command(name="install")
@click.argument("handle")
def tool_install(handle: str) -> None:
    tool_cmd = ToolCommand()
    tool_cmd.login()
    tool_cmd.install(handle)


@tool.command(name="publish")
@click.option(
    "--force",
    is_flag=True,
    show_default=True,
    default=False,
    help="Bypasses Git remote validations",
)
@click.option("--public", "is_public", flag_value=True, default=False)
@click.option("--private", "is_public", flag_value=False)
def tool_publish(is_public: bool, force: bool) -> None:
    tool_cmd = ToolCommand()
    tool_cmd.login()
    tool_cmd.publish(is_public, force)


@crewai.group()
def template() -> None:
    """Browse and install project templates."""


@template.command(name="list")
def template_list() -> None:
    """List available templates and select one to install."""
    template_cmd = TemplateCommand()
    template_cmd.list_templates()


@template.command(name="add")
@click.argument("name")
@click.option(
    "-o",
    "--output-dir",
    type=str,
    default=None,
    help="Directory name for the template (defaults to template name)",
)
def template_add(name: str, output_dir: str | None) -> None:
    """Add a template to the current directory."""
    template_cmd = TemplateCommand()
    template_cmd.add_template(name, output_dir)


@crewai.group()
def flow() -> None:
    """Flow related commands."""


@flow.command(name="kickoff")
def flow_run() -> None:
    """Kickoff the Flow."""
    click.echo("Running the Flow")
    kickoff_flow()


@flow.command(name="plot")
def flow_plot() -> None:
    """Plot the Flow."""
    click.echo("Plotting the Flow")
    plot_flow()


@flow.command(name="add-crew")
@click.argument("crew_name")
def flow_add_crew(crew_name: str) -> None:
    """Add a crew to an existing flow."""
    click.echo(f"Adding crew {crew_name} to the flow")
    add_crew_to_flow(crew_name)


@crewai.group()
def agent() -> None:
    """Agent management commands."""


@agent.command(name="reset-history")
@click.argument("name")
@click.option(
    "--keep-provenance",
    is_flag=True,
    help="Keep the provenance (decision audit trail) when clearing history.",
)
def agent_reset_history(name: str, keep_provenance: bool) -> None:
    """Clear conversation history for the named agent."""
    from pathlib import Path

    conversations_dir = Path.cwd() / ".crewai" / "conversations"
    history_path = conversations_dir / f"{name}.json"
    provenance_path = conversations_dir / f"{name}_provenance.json"

    cleared: list[str] = []

    if history_path.exists():
        history_path.unlink()
        cleared.append("conversation history")

    if not keep_provenance and provenance_path.exists():
        provenance_path.unlink()
        cleared.append("provenance log")

    if cleared:
        click.secho(
            f"Cleared {' and '.join(cleared)} for agent '{name}'.",
            fg="green",
        )
    else:
        click.secho(
            f"No conversation history found for agent '{name}'.",
            fg="yellow",
        )


@agent.command(name="memory")
@click.argument("name")
@click.option("--search", "-s", default=None, help="Search memories by keyword")
@click.option("--clear", is_flag=True, help="Clear all memories")
@click.option("--limit", "-n", "limit_", default=10, help="Number of memories to show")
def agent_memory(name: str, search: str | None, clear: bool, limit_: int) -> None:
    """Inspect or manage agent memories."""
    from pathlib import Path

    agents_dir = Path.cwd() / "agents"
    agent_path = None
    for ext in (".json", ".jsonc"):
        p = agents_dir / f"{name}{ext}"
        if p.exists():
            agent_path = p
            break

    if not agent_path:
        click.echo(f"Agent '{name}' not found in agents/ directory.")
        return

    try:
        from crewai.new_agent.definition_parser import load_agent_from_definition

        agent_instance = load_agent_from_definition(agent_path, agents_dir)
    except Exception as e:
        click.echo(f"Failed to load agent '{name}': {e}")
        return

    if agent_instance is None:
        click.echo(f"Could not create agent '{name}'.")
        return

    if clear:
        if click.confirm(f"Clear all memories for '{name}'?"):
            if (
                hasattr(agent_instance, "_memory_instance")
                and agent_instance._memory_instance
            ):
                try:
                    agent_instance._memory_instance.reset()
                    click.echo(f"Memories cleared for '{name}'.")
                except Exception as e:
                    click.echo(f"Failed to clear memories: {e}")
            else:
                click.echo(f"No memory configured for '{name}'.")
        return

    if (
        not hasattr(agent_instance, "_memory_instance")
        or not agent_instance._memory_instance
    ):
        click.echo(f"No memory configured for '{name}'.")
        return

    # GAP-93: Rich formatted output for agent memory inspection
    try:
        from rich.console import Console
        from rich.table import Table
    except ImportError:
        # Fall back to plain text if rich is not available
        Console = None  # type: ignore[assignment,misc]

    try:
        if search:
            results = agent_instance._memory_instance.recall(
                search, limit=limit_, depth="shallow"
            )
        else:
            results = agent_instance._memory_instance.list_records(limit=limit_)

        if not results:
            msg = (
                f"No memories matching '{search}'"
                if search
                else f"No memories stored for '{name}'."
            )
            click.echo(msg)
            return

        if Console is not None:
            console = Console()
            title = (
                f"Memories matching '{search}' — {name}"
                if search
                else f"Memories — {name}"
            )
            table = Table(title=title, show_lines=True)
            table.add_column("#", style="dim", width=4)
            table.add_column("Content", min_width=40)
            table.add_column("Type", width=10)
            table.add_column("Scope", width=10)

            for i, mem in enumerate(results, 1):
                record = getattr(mem, "record", mem)
                content = getattr(record, "content", "") or str(mem)
                if len(content) > 200:
                    content = content[:200] + "..."
                meta = getattr(record, "metadata", {}) or {}
                mem_type = meta.get("type", "raw")
                scope = getattr(record, "scope", meta.get("scope", "—"))
                table.add_row(str(i), content, mem_type, scope)

            console.print(table)
        else:
            heading = (
                f"Memories matching '{search}':"
                if search
                else f"Recent memories for '{name}':"
            )
            click.echo(heading)
            for i, r in enumerate(results, 1):
                click.echo(f"  {i}. {str(r)[:100]}")
    except Exception as e:
        click.echo(f"Memory operation failed: {e}")


@crewai.group()
def triggers() -> None:
    """Trigger related commands. Use 'crewai triggers list' to see available triggers, or 'crewai triggers run app_slug/trigger_slug' to execute."""


@triggers.command(name="list")
def triggers_list() -> None:
    """List all available triggers from integrations."""
    triggers_cmd = TriggersCommand()
    triggers_cmd.list_triggers()


@triggers.command(name="run")
@click.argument("trigger_path")
def triggers_run(trigger_path: str) -> None:
    """Execute crew with trigger payload. Format: app_slug/trigger_slug"""
    triggers_cmd = TriggersCommand()
    triggers_cmd.execute_with_trigger(trigger_path)


@crewai.command()
def chat() -> None:
    """Start a conversation with the Crew, collecting user-supplied inputs,
    and using the Chat LLM to generate responses.
    """
    click.secho(
        "\nStarting a conversation with the Crew\nType 'exit' or Ctrl+C to quit.\n",
    )
    run_chat()


@crewai.group(invoke_without_command=True)
def org() -> None:
    """Organization management commands."""


@org.command("list")
def org_list() -> None:
    """List available organizations."""
    org_command = OrganizationCommand()
    org_command.list()


@org.command()
@click.argument("id")
def switch(id: str) -> None:
    """Switch to a specific organization."""
    org_command = OrganizationCommand()
    org_command.switch(id)


@org.command()
def current() -> None:
    """Show current organization when 'crewai org' is called without subcommands."""
    org_command = OrganizationCommand()
    org_command.current()


@crewai.group()
def enterprise() -> None:
    """Enterprise Configuration commands."""


@enterprise.command("configure")
@click.argument("enterprise_url")
def enterprise_configure(enterprise_url: str) -> None:
    """Configure CrewAI AMP OAuth2 settings from the provided Enterprise URL."""
    enterprise_command = EnterpriseConfigureCommand()
    enterprise_command.configure(enterprise_url)


@crewai.group()
def config() -> None:
    """CLI Configuration commands."""


@config.command("list")
def config_list() -> None:
    """List all CLI configuration parameters."""
    config_command = SettingsCommand()
    config_command.list()


@config.command("set")
@click.argument("key")
@click.argument("value")
def config_set(key: str, value: str) -> None:
    """Set a CLI configuration parameter."""
    config_command = SettingsCommand()
    config_command.set(key, value)


@config.command("reset")
def config_reset() -> None:
    """Reset all CLI configuration parameters to default values."""
    config_command = SettingsCommand()
    config_command.reset_all_settings()


@crewai.group()
def env() -> None:
    """Environment variable commands."""


@env.command("view")
def env_view() -> None:
    """View tracing-related environment variables."""
    from pathlib import Path

    from rich.console import Console
    from rich.panel import Panel
    from rich.table import Table

    console = Console()

    # Check for .env file
    env_file = Path(".env")
    env_file_exists = env_file.exists()

    # Create table for environment variables
    table = Table(show_header=True, header_style="bold cyan", expand=True)
    table.add_column("Environment Variable", style="cyan", width=30)
    table.add_column("Value", style="white", width=20)
    table.add_column("Source", style="yellow", width=20)

    # Check CREWAI_TRACING_ENABLED
    crewai_tracing = os.getenv("CREWAI_TRACING_ENABLED", "")
    if crewai_tracing:
        table.add_row(
            "CREWAI_TRACING_ENABLED",
            crewai_tracing,
            "Environment/Shell",
        )
    else:
        table.add_row(
            "CREWAI_TRACING_ENABLED",
            "[dim]Not set[/dim]",
            "[dim]—[/dim]",
        )

    # Check other related env vars
    crewai_testing = os.getenv("CREWAI_TESTING", "")
    if crewai_testing:
        table.add_row("CREWAI_TESTING", crewai_testing, "Environment/Shell")

    crewai_user_id = os.getenv("CREWAI_USER_ID", "")
    if crewai_user_id:
        table.add_row("CREWAI_USER_ID", crewai_user_id, "Environment/Shell")

    crewai_org_id = os.getenv("CREWAI_ORG_ID", "")
    if crewai_org_id:
        table.add_row("CREWAI_ORG_ID", crewai_org_id, "Environment/Shell")

    # Check if .env file exists
    table.add_row(
        ".env file",
        "✅ Found" if env_file_exists else "❌ Not found",
        str(env_file.resolve()) if env_file_exists else "N/A",
    )

    panel = Panel(
        table,
        title="Tracing Environment Variables",
        border_style="blue",
        padding=(1, 2),
    )
    console.print("\n")
    console.print(panel)

    # Show helpful message
    if env_file_exists:
        console.print(
            "\n[dim]💡 Tip: To enable tracing via .env, add: CREWAI_TRACING_ENABLED=true[/dim]"
        )
    else:
        console.print(
            "\n[dim]💡 Tip: Create a .env file in your project root and add: CREWAI_TRACING_ENABLED=true[/dim]"
        )
    console.print()


@crewai.group()
def traces() -> None:
    """Trace collection management commands."""


@traces.command("enable")
def traces_enable() -> None:
    """Enable trace collection for crew/flow executions."""
    from rich.console import Console
    from rich.panel import Panel

    console = Console()

    update_user_data({"trace_consent": True, "first_execution_done": True})

    panel = Panel(
        "✅ Trace collection enabled.\n\n"
        "Your crew/flow executions will now send traces to CrewAI+.\n"
        "Use 'crewai traces disable' to opt out.",
        title="Traces Enabled",
        border_style="green",
        padding=(1, 2),
    )
    console.print(panel)


@traces.command("disable")
def traces_disable() -> None:
    """Disable trace collection for crew/flow executions."""
    from rich.console import Console
    from rich.panel import Panel

    console = Console()

    update_user_data({"trace_consent": False, "first_execution_done": True})

    panel = Panel(
        "❌ Trace collection disabled.\n\n"
        "Your crew/flow executions will no longer send traces "
        "(unless [bold]CREWAI_TRACING_ENABLED=true[/bold] is set in the environment, "
        "which overrides the opt-out).\n"
        "Use 'crewai traces enable' to opt back in.",
        title="Traces Disabled",
        border_style="red",
        padding=(1, 2),
    )
    console.print(panel)


@traces.command("status")
def traces_status() -> None:
    """Show current trace collection status."""

    from rich.console import Console
    from rich.panel import Panel
    from rich.table import Table

    console = Console()
    user_data = _load_user_data()

    table = Table(show_header=False, box=None)
    table.add_column("Setting", style="cyan")
    table.add_column("Value", style="white")

    # Check environment variable
    env_enabled = os.getenv("CREWAI_TRACING_ENABLED", "false")
    table.add_row("CREWAI_TRACING_ENABLED", env_enabled)

    # Check user consent
    trace_consent = user_data.get("trace_consent")
    if trace_consent is True:
        consent_status = "✅ Enabled (user consented)"
    elif trace_consent is False:
        consent_status = "❌ Disabled (user declined)"
    else:
        consent_status = "⚪ Not set (first-time user)"
    table.add_row("User Consent", consent_status)

    # Check overall status
    if is_tracing_enabled():
        overall_status = "✅ ENABLED"
        border_style = "green"
    else:
        overall_status = "❌ DISABLED"
        border_style = "red"
    table.add_row("Overall Status", overall_status)

    panel = Panel(
        table,
        title="Trace Collection Status",
        border_style=border_style,
        padding=(1, 2),
    )
    console.print(panel)


@crewai.group(invoke_without_command=True)
@click.option(
    "--location", default="./.checkpoints", help="Checkpoint directory or SQLite file."
)
@click.pass_context
def checkpoint(ctx: click.Context, location: str) -> None:
    """Browse and inspect checkpoints. Launches a TUI when called without a subcommand."""
    from crewai_cli.checkpoint_cli import _detect_location

    location = _detect_location(location)
    ctx.ensure_object(dict)
    ctx.obj["location"] = location
    if ctx.invoked_subcommand is None:
        from crewai_cli.checkpoint_tui import run_checkpoint_tui

        run_checkpoint_tui(location)


@checkpoint.command("list")
@click.argument("location", default="./.checkpoints")
def checkpoint_list(location: str) -> None:
    """List checkpoints in a directory."""
    from crewai_cli.checkpoint_cli import _detect_location, list_checkpoints

    list_checkpoints(_detect_location(location))


@checkpoint.command("info")
@click.argument("path", default="./.checkpoints")
def checkpoint_info(path: str) -> None:
    """Show details of a checkpoint. Pass a file or directory for latest."""
    from crewai_cli.checkpoint_cli import _detect_location, info_checkpoint

    info_checkpoint(_detect_location(path))


@checkpoint.command("resume")
@click.argument("checkpoint_id", required=False, default=None)
@click.pass_context
def checkpoint_resume(ctx: click.Context, checkpoint_id: str | None) -> None:
    """Resume from a checkpoint. Defaults to the most recent."""
    from crewai_cli.checkpoint_cli import resume_checkpoint

    resume_checkpoint(ctx.obj["location"], checkpoint_id)


@checkpoint.command("diff")
@click.argument("id1")
@click.argument("id2")
@click.pass_context
def checkpoint_diff(ctx: click.Context, id1: str, id2: str) -> None:
    """Compare two checkpoints side-by-side."""
    from crewai_cli.checkpoint_cli import diff_checkpoints

    diff_checkpoints(ctx.obj["location"], id1, id2)


@checkpoint.command("prune")
@click.option(
    "--keep", type=int, default=None, help="Keep the N most recent checkpoints."
)
@click.option(
    "--older-than",
    default=None,
    help="Remove checkpoints older than duration (e.g. 7d, 24h, 30m).",
)
@click.option(
    "--dry-run", is_flag=True, help="Show what would be pruned without deleting."
)
@click.pass_context
def checkpoint_prune(
    ctx: click.Context, keep: int | None, older_than: str | None, dry_run: bool
) -> None:
    """Remove old checkpoints."""
    from crewai_cli.checkpoint_cli import prune_checkpoints

    prune_checkpoints(ctx.obj["location"], keep, older_than, dry_run)


@crewai.command()
@click.argument("agent_path", type=click.Path(exists=True))
@click.argument("cases_path", type=click.Path(exists=True))
@click.option(
    "--models",
    "-m",
    multiple=True,
    help="Models to compare (e.g., openai/gpt-4o openai/gpt-4o-mini)",
)
@click.option(
    "--judge-model",
    default=None,
    help="Model for LLM judge evaluation. "
    "Defaults to test.judge_model in config.json (openai/gpt-4o-mini if not set).",
)
@click.option(
    "-v",
    "--verbose",
    is_flag=True,
    help="Show agent execution details (tool calls, LLM responses, errors).",
)
def benchmark(
    agent_path: str,
    cases_path: str,
    models: tuple[str, ...],
    judge_model: str | None,
    verbose: bool,
) -> None:
    """Run agent against test cases and report results."""
    import asyncio

    from crewai_cli.run_crew import _needs_uv_relaunch, _relaunch_via_uv

    judge_model = (
        judge_model or _read_config("test", "judge_model") or "openai/gpt-4o-mini"
    )
    config_timeout = _read_config("test", "case_timeout")
    effective_timeout = int(config_timeout) if config_timeout is not None else 90

    if _needs_uv_relaunch():
        uv_args = ["benchmark", agent_path, cases_path, "--judge-model", judge_model]
        for m in models:
            uv_args.extend(["-m", m])
        if verbose:
            uv_args.append("-v")
        _relaunch_via_uv(uv_args)

    from rich.console import Console as _RichConsole

    from crewai_cli.benchmark import (
        load_benchmark_cases,
        print_comparison_chart,
        print_results_chart,
        run_benchmark,
    )

    _con = _RichConsole()

    from pathlib import Path as _P

    agent_path = str(_P(agent_path).resolve())
    cases_path = str(_P(cases_path).resolve())

    try:
        cases = load_benchmark_cases(cases_path)
    except (FileNotFoundError, ValueError) as e:
        click.secho(f"Error loading benchmark cases: {e}", fg="red")
        raise SystemExit(1) from e

    agent_name = _P(agent_path).stem
    model_list = list(models) if models else None
    models_str = ", ".join(model_list) if model_list else "default"
    click.echo()
    _con.print(
        f"[bold cyan]Benchmarking[/bold cyan] [bold]{agent_name}[/bold]  "
        f"[dim]{len(cases)} cases · judge {judge_model} · models: {models_str}[/dim]"
    )
    click.echo()

    from crewai_cli.benchmark import (
        ArtifactsSandbox,
        SuppressBenchmarkOutput,
        VerboseBenchmarkOutput,
    )

    progress = None if verbose else _BenchmarkLiveProgress(console=_con)
    _loop = asyncio.new_event_loop()
    asyncio.set_event_loop(_loop)
    try:
        if progress:
            progress.start()
        output_ctx = VerboseBenchmarkOutput() if verbose else SuppressBenchmarkOutput()
        with ArtifactsSandbox(), output_ctx:
            results_by_model = _loop.run_until_complete(
                run_benchmark(
                    agent_def=agent_path,
                    cases=cases,
                    models=model_list,
                    judge_model=judge_model,
                    on_progress=progress.on_progress if progress else None,
                    verbose=verbose,
                    case_timeout=effective_timeout,
                )
            )
    except Exception as e:
        click.secho(f"Error running benchmark: {e}", fg="red")
        raise SystemExit(1) from e
    finally:
        if progress:
            progress.stop()
        _loop.close()

    if len(results_by_model) == 1:
        _single_results = next(iter(results_by_model.values()))
        print_results_chart(_single_results, console=_con)
    elif len(results_by_model) > 1:
        _con.print()
        print_comparison_chart(results_by_model, console=_con)

    saved = _save_run_results(
        results_by_model,
        command="benchmark",
        judge_model=judge_model,
    )
    _con.print(f"\n  [dim]Results saved to {saved}[/dim]")


if __name__ == "__main__":
    crewai()