diff --git a/lib/cli/src/crewai_cli/agent_tui.py b/lib/cli/src/crewai_cli/agent_tui.py index 2749ed298..d8e4a31b2 100644 --- a/lib/cli/src/crewai_cli/agent_tui.py +++ b/lib/cli/src/crewai_cli/agent_tui.py @@ -139,7 +139,7 @@ def _load_config(base: Path) -> dict[str, Any]: return {"rooms": {"common": {"agents": [], "engagement": "organic"}}} try: raw = config_path.read_text(encoding="utf-8") - return json.loads(_strip_jsonc(raw)) + return json.loads(_strip_jsonc(raw)) # type: ignore[no-any-return] except Exception: return {"rooms": {"common": {"agents": [], "engagement": "organic"}}} @@ -559,12 +559,12 @@ class AgentTUI(App[None]): def _room_engagement(self, room_key: str) -> str: if room_key in self._rooms: - return self._rooms[room_key].get("engagement", "organic") + return str(self._rooms[room_key].get("engagement", "organic")) return "organic" def _room_agents(self, room_key: str) -> list[str]: if room_key in self._rooms: - return self._rooms[room_key].get("agents", self._agent_names[:]) + return list(self._rooms[room_key].get("agents", self._agent_names[:])) return self._agent_names[:] def on_mount(self) -> None: @@ -1679,7 +1679,7 @@ class AgentTUI(App[None]): # ── Actions ── - def action_quit(self) -> None: + async def action_quit(self) -> None: """Graceful shutdown: stop scheduler, silence event bus, then exit.""" self._mount_sys("Shutting down...") if self._scheduler: diff --git a/lib/cli/src/crewai_cli/benchmark.py b/lib/cli/src/crewai_cli/benchmark.py index 89229ace7..9d0ca9127 100644 --- a/lib/cli/src/crewai_cli/benchmark.py +++ b/lib/cli/src/crewai_cli/benchmark.py @@ -3,7 +3,7 @@ from __future__ import annotations import asyncio -from collections.abc import Callable +from collections.abc import Callable, Iterator import json from pathlib import Path import re @@ -49,10 +49,10 @@ class LoadedCases: def __len__(self) -> int: return len(self.cases) - def __iter__(self): + def __iter__(self) -> Iterator[BenchmarkCase]: return iter(self.cases) - def __getitem__(self, index): + def __getitem__(self, index: int) -> BenchmarkCase: return self.cases[index] @@ -136,6 +136,7 @@ async def _judge_with_llm( from crewai.utilities.llm_utils import create_llm judge_llm = create_llm(judge_model) + assert judge_llm is not None prompt = ( "You are an evaluation judge. Score the following response on a scale of 0.0 to 1.0.\n\n" @@ -444,7 +445,7 @@ async def run_benchmark( class SuppressBenchmarkOutput: """Context manager that silences console formatter and noisy logging during benchmarks.""" - def __enter__(self): + def __enter__(self) -> "SuppressBenchmarkOutput": import logging self._saved_formatter = None @@ -453,7 +454,7 @@ class SuppressBenchmarkOutput: TraceCollectionListener, ) - listener = TraceCollectionListener._instance + listener = TraceCollectionListener._instance # type: ignore[misc] if listener: self._saved_formatter = listener.formatter listener.formatter = None @@ -471,7 +472,7 @@ class SuppressBenchmarkOutput: lg.setLevel(logging.CRITICAL) return self - def __exit__(self, *exc): + def __exit__(self, *exc: object) -> None: for lg, level in self._loggers: lg.setLevel(level) if self._saved_formatter is not None: @@ -480,7 +481,7 @@ class SuppressBenchmarkOutput: TraceCollectionListener, ) - listener = TraceCollectionListener._instance + listener = TraceCollectionListener._instance # type: ignore[misc] if listener: listener.formatter = self._saved_formatter except Exception: @@ -490,7 +491,7 @@ class SuppressBenchmarkOutput: class VerboseBenchmarkOutput: """Context manager that subscribes to NewAgent events and prints them for debugging.""" - def __enter__(self): + def __enter__(self) -> "VerboseBenchmarkOutput": import logging import sys @@ -513,7 +514,7 @@ class VerboseBenchmarkOutput: TraceCollectionListener, ) - listener = TraceCollectionListener._instance + listener = TraceCollectionListener._instance # type: ignore[misc] if listener: self._saved_formatter = listener.formatter listener.formatter = None @@ -537,38 +538,38 @@ class VerboseBenchmarkOutput: w = sys.stderr.write fl = sys.stderr.flush - def _on_llm_start(_src, ev: NewAgentLLMCallStartedEvent): + def _on_llm_start(_src: Any, ev: NewAgentLLMCallStartedEvent) -> None: w(f"\033[36m[llm] calling {ev.model}…\033[0m\n") fl() - def _on_llm_done(_src, ev: NewAgentLLMCallCompletedEvent): + def _on_llm_done(_src: Any, ev: NewAgentLLMCallCompletedEvent) -> None: w( f"\033[36m[llm] {ev.model} {ev.input_tokens}→{ev.output_tokens} tokens {ev.response_time_ms}ms\033[0m\n" ) fl() - def _on_llm_fail(_src, ev: NewAgentLLMCallFailedEvent): + def _on_llm_fail(_src: Any, ev: NewAgentLLMCallFailedEvent) -> None: w(f"\033[31m[llm] FAILED: {ev.error[:200]}\033[0m\n") fl() - def _on_tool_start(_src, ev: NewAgentToolUsageStartedEvent): + def _on_tool_start(_src: Any, ev: NewAgentToolUsageStartedEvent) -> None: w(f"\033[33m[tool] using {ev.tool_name}…\033[0m\n") fl() - def _on_tool_done(_src, ev: NewAgentToolUsageCompletedEvent): + def _on_tool_done(_src: Any, ev: NewAgentToolUsageCompletedEvent) -> None: w(f"\033[33m[tool] {ev.tool_name} done\033[0m\n") fl() - def _on_tool_fail(_src, ev: NewAgentToolUsageFailedEvent): + def _on_tool_fail(_src: Any, ev: NewAgentToolUsageFailedEvent) -> None: w(f"\033[31m[tool] {ev.tool_name} FAILED: {ev.error[:200]}\033[0m\n") fl() - def _on_status(_src, ev: NewAgentStatusUpdateEvent): + def _on_status(_src: Any, ev: NewAgentStatusUpdateEvent) -> None: if ev.detail: w(f"\033[2m[status] {ev.state}: {ev.detail}\033[0m\n") fl() - def _on_summarized(_src, ev: NewAgentContextSummarizedEvent): + def _on_summarized(_src: Any, ev: NewAgentContextSummarizedEvent) -> None: w("\033[35m[context] summarized — context was too large\033[0m\n") fl() @@ -583,14 +584,14 @@ class VerboseBenchmarkOutput: (NewAgentContextSummarizedEvent, _on_summarized), ] for event_type, handler in pairs: - self._bus.on(event_type)(handler) + self._bus.on(event_type)(handler) # type: ignore[arg-type] self._handlers.append((event_type, handler)) return self - def __exit__(self, *exc): + def __exit__(self, *exc: object) -> None: for event_type, handler in self._handlers: try: - self._bus.off(event_type, handler) + self._bus.off(event_type, handler) # type: ignore[arg-type] except Exception: pass for lg, level in self._loggers: @@ -601,7 +602,7 @@ class VerboseBenchmarkOutput: TraceCollectionListener, ) - listener = TraceCollectionListener._instance + listener = TraceCollectionListener._instance # type: ignore[misc] if listener: listener.formatter = self._saved_formatter except Exception: @@ -619,7 +620,7 @@ class ArtifactsSandbox: self._base = Path(base) self._prev_cwd: str | None = None - def __enter__(self): + def __enter__(self) -> "ArtifactsSandbox": import os self._base.mkdir(parents=True, exist_ok=True) @@ -630,7 +631,7 @@ class ArtifactsSandbox: os.chdir(self._base) return self - def __exit__(self, *exc): + def __exit__(self, *exc: object) -> None: import os if self._prev_cwd: diff --git a/lib/cli/src/crewai_cli/cli.py b/lib/cli/src/crewai_cli/cli.py index 183aa1bf8..b494dae8d 100644 --- a/lib/cli/src/crewai_cli/cli.py +++ b/lib/cli/src/crewai_cli/cli.py @@ -198,7 +198,7 @@ def train(n_iterations: int, filename: str) -> None: train_crew(n_iterations, filename) -def _train_new_agents(agent_files: list, n_iterations: int) -> None: +def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None: """Run interactive training for NewAgent agents. For each agent, loads benchmark cases, runs them, shows the response, @@ -631,14 +631,14 @@ def _read_config(*keys: str) -> Any: class _BenchmarkLiveProgress: """Live parallel progress display for benchmark runs.""" - def __init__(self, console=None): + def __init__(self, console: Any = None) -> None: from rich.console import Console self._console = console or Console() - self._state: dict[str, dict] = {} - self._live = None + self._state: dict[str, dict[str, Any]] = {} + self._live: Any = None - def start(self): + def start(self) -> None: from rich.live import Live self._live = Live( @@ -649,13 +649,13 @@ class _BenchmarkLiveProgress: ) self._live.start() - def stop(self): + def stop(self) -> None: if self._live: self._live.update(self._render()) self._live.stop() self._live = None - def on_progress(self, event: dict) -> None: + def on_progress(self, event: dict[str, Any]) -> None: t = event["type"] model = event.get("model", "") @@ -695,7 +695,7 @@ class _BenchmarkLiveProgress: if self._live: self._live.update(self._render()) - def _render(self): + def _render(self) -> Any: from rich import box from rich.spinner import Spinner from rich.table import Table @@ -721,6 +721,7 @@ class _BenchmarkLiveProgress: table.add_column("", no_wrap=True, justify="right") # cost for model, info in self._state.items(): + icon: Any if info["status"] == "done": icon = Text("✓", style="green") color = _score_color(info["avg"]) @@ -758,7 +759,7 @@ class _BenchmarkLiveProgress: def _test_new_agents( - agent_files: list, + agent_files: list[Any], n_iterations: int, model: str | None, threshold: float, @@ -782,7 +783,7 @@ def _test_new_agents( tests_dir = Path("benchmarks") # Collect valid agents + cases - jobs: list[dict] = [] + jobs: list[dict[str, Any]] = [] for agent_path in agent_files: agent_name = agent_path.stem cases_path = tests_dir / f"{agent_name}_cases.json" @@ -816,8 +817,8 @@ def _test_new_agents( # Progress display — prefix model key with agent name progress = None if verbose else _BenchmarkLiveProgress(console=_con) - def _make_progress_cb(agent_name: str): - def _cb(event: dict) -> None: + def _make_progress_cb(agent_name: str) -> Any: + def _cb(event: dict[str, Any]) -> None: if progress is not None: prefixed = dict(event) if "model" in prefixed: @@ -826,7 +827,7 @@ def _test_new_agents( return _cb - async def _run_all(): + async def _run_all() -> Any: tasks = [] for job in jobs: tasks.append( @@ -858,6 +859,7 @@ def _test_new_agents( ) if not verbose: + assert progress is not None progress.start() try: with ArtifactsSandbox(): @@ -869,6 +871,7 @@ def _test_new_agents( all_results = asyncio.run(_run_all()) finally: if not verbose: + assert progress is not None progress.stop() # Evaluate results