fix: resolve all mypy type errors in CLI files

Add missing type annotations to benchmark.py context managers, event
handlers, LoadedCases iteration methods, and fix union-attr on BaseLLM.
Fix no-any-return errors in agent_tui.py and make action_quit async to
match the Textual App supertype. Add type annotations to
_BenchmarkLiveProgress methods in cli.py and fix icon redefinition.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
alex-clawd
2026-05-13 07:10:07 -07:00
parent 27fd105ad6
commit 68fb64f383
3 changed files with 44 additions and 40 deletions

View File

@@ -139,7 +139,7 @@ def _load_config(base: Path) -> dict[str, Any]:
return {"rooms": {"common": {"agents": [], "engagement": "organic"}}}
try:
raw = config_path.read_text(encoding="utf-8")
return json.loads(_strip_jsonc(raw))
return json.loads(_strip_jsonc(raw)) # type: ignore[no-any-return]
except Exception:
return {"rooms": {"common": {"agents": [], "engagement": "organic"}}}
@@ -559,12 +559,12 @@ class AgentTUI(App[None]):
def _room_engagement(self, room_key: str) -> str:
if room_key in self._rooms:
return self._rooms[room_key].get("engagement", "organic")
return str(self._rooms[room_key].get("engagement", "organic"))
return "organic"
def _room_agents(self, room_key: str) -> list[str]:
if room_key in self._rooms:
return self._rooms[room_key].get("agents", self._agent_names[:])
return list(self._rooms[room_key].get("agents", self._agent_names[:]))
return self._agent_names[:]
def on_mount(self) -> None:
@@ -1679,7 +1679,7 @@ class AgentTUI(App[None]):
# ── Actions ──
def action_quit(self) -> None:
async def action_quit(self) -> None:
"""Graceful shutdown: stop scheduler, silence event bus, then exit."""
self._mount_sys("Shutting down...")
if self._scheduler:

View File

@@ -3,7 +3,7 @@
from __future__ import annotations
import asyncio
from collections.abc import Callable
from collections.abc import Callable, Iterator
import json
from pathlib import Path
import re
@@ -49,10 +49,10 @@ class LoadedCases:
def __len__(self) -> int:
return len(self.cases)
def __iter__(self):
def __iter__(self) -> Iterator[BenchmarkCase]:
return iter(self.cases)
def __getitem__(self, index):
def __getitem__(self, index: int) -> BenchmarkCase:
return self.cases[index]
@@ -136,6 +136,7 @@ async def _judge_with_llm(
from crewai.utilities.llm_utils import create_llm
judge_llm = create_llm(judge_model)
assert judge_llm is not None
prompt = (
"You are an evaluation judge. Score the following response on a scale of 0.0 to 1.0.\n\n"
@@ -444,7 +445,7 @@ async def run_benchmark(
class SuppressBenchmarkOutput:
"""Context manager that silences console formatter and noisy logging during benchmarks."""
def __enter__(self):
def __enter__(self) -> "SuppressBenchmarkOutput":
import logging
self._saved_formatter = None
@@ -453,7 +454,7 @@ class SuppressBenchmarkOutput:
TraceCollectionListener,
)
listener = TraceCollectionListener._instance
listener = TraceCollectionListener._instance # type: ignore[misc]
if listener:
self._saved_formatter = listener.formatter
listener.formatter = None
@@ -471,7 +472,7 @@ class SuppressBenchmarkOutput:
lg.setLevel(logging.CRITICAL)
return self
def __exit__(self, *exc):
def __exit__(self, *exc: object) -> None:
for lg, level in self._loggers:
lg.setLevel(level)
if self._saved_formatter is not None:
@@ -480,7 +481,7 @@ class SuppressBenchmarkOutput:
TraceCollectionListener,
)
listener = TraceCollectionListener._instance
listener = TraceCollectionListener._instance # type: ignore[misc]
if listener:
listener.formatter = self._saved_formatter
except Exception:
@@ -490,7 +491,7 @@ class SuppressBenchmarkOutput:
class VerboseBenchmarkOutput:
"""Context manager that subscribes to NewAgent events and prints them for debugging."""
def __enter__(self):
def __enter__(self) -> "VerboseBenchmarkOutput":
import logging
import sys
@@ -513,7 +514,7 @@ class VerboseBenchmarkOutput:
TraceCollectionListener,
)
listener = TraceCollectionListener._instance
listener = TraceCollectionListener._instance # type: ignore[misc]
if listener:
self._saved_formatter = listener.formatter
listener.formatter = None
@@ -537,38 +538,38 @@ class VerboseBenchmarkOutput:
w = sys.stderr.write
fl = sys.stderr.flush
def _on_llm_start(_src, ev: NewAgentLLMCallStartedEvent):
def _on_llm_start(_src: Any, ev: NewAgentLLMCallStartedEvent) -> None:
w(f"\033[36m[llm] calling {ev.model}\033[0m\n")
fl()
def _on_llm_done(_src, ev: NewAgentLLMCallCompletedEvent):
def _on_llm_done(_src: Any, ev: NewAgentLLMCallCompletedEvent) -> None:
w(
f"\033[36m[llm] {ev.model} {ev.input_tokens}{ev.output_tokens} tokens {ev.response_time_ms}ms\033[0m\n"
)
fl()
def _on_llm_fail(_src, ev: NewAgentLLMCallFailedEvent):
def _on_llm_fail(_src: Any, ev: NewAgentLLMCallFailedEvent) -> None:
w(f"\033[31m[llm] FAILED: {ev.error[:200]}\033[0m\n")
fl()
def _on_tool_start(_src, ev: NewAgentToolUsageStartedEvent):
def _on_tool_start(_src: Any, ev: NewAgentToolUsageStartedEvent) -> None:
w(f"\033[33m[tool] using {ev.tool_name}\033[0m\n")
fl()
def _on_tool_done(_src, ev: NewAgentToolUsageCompletedEvent):
def _on_tool_done(_src: Any, ev: NewAgentToolUsageCompletedEvent) -> None:
w(f"\033[33m[tool] {ev.tool_name} done\033[0m\n")
fl()
def _on_tool_fail(_src, ev: NewAgentToolUsageFailedEvent):
def _on_tool_fail(_src: Any, ev: NewAgentToolUsageFailedEvent) -> None:
w(f"\033[31m[tool] {ev.tool_name} FAILED: {ev.error[:200]}\033[0m\n")
fl()
def _on_status(_src, ev: NewAgentStatusUpdateEvent):
def _on_status(_src: Any, ev: NewAgentStatusUpdateEvent) -> None:
if ev.detail:
w(f"\033[2m[status] {ev.state}: {ev.detail}\033[0m\n")
fl()
def _on_summarized(_src, ev: NewAgentContextSummarizedEvent):
def _on_summarized(_src: Any, ev: NewAgentContextSummarizedEvent) -> None:
w("\033[35m[context] summarized — context was too large\033[0m\n")
fl()
@@ -583,14 +584,14 @@ class VerboseBenchmarkOutput:
(NewAgentContextSummarizedEvent, _on_summarized),
]
for event_type, handler in pairs:
self._bus.on(event_type)(handler)
self._bus.on(event_type)(handler) # type: ignore[arg-type]
self._handlers.append((event_type, handler))
return self
def __exit__(self, *exc):
def __exit__(self, *exc: object) -> None:
for event_type, handler in self._handlers:
try:
self._bus.off(event_type, handler)
self._bus.off(event_type, handler) # type: ignore[arg-type]
except Exception:
pass
for lg, level in self._loggers:
@@ -601,7 +602,7 @@ class VerboseBenchmarkOutput:
TraceCollectionListener,
)
listener = TraceCollectionListener._instance
listener = TraceCollectionListener._instance # type: ignore[misc]
if listener:
listener.formatter = self._saved_formatter
except Exception:
@@ -619,7 +620,7 @@ class ArtifactsSandbox:
self._base = Path(base)
self._prev_cwd: str | None = None
def __enter__(self):
def __enter__(self) -> "ArtifactsSandbox":
import os
self._base.mkdir(parents=True, exist_ok=True)
@@ -630,7 +631,7 @@ class ArtifactsSandbox:
os.chdir(self._base)
return self
def __exit__(self, *exc):
def __exit__(self, *exc: object) -> None:
import os
if self._prev_cwd:

View File

@@ -198,7 +198,7 @@ def train(n_iterations: int, filename: str) -> None:
train_crew(n_iterations, filename)
def _train_new_agents(agent_files: list, n_iterations: int) -> None:
def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None:
"""Run interactive training for NewAgent agents.
For each agent, loads benchmark cases, runs them, shows the response,
@@ -631,14 +631,14 @@ def _read_config(*keys: str) -> Any:
class _BenchmarkLiveProgress:
"""Live parallel progress display for benchmark runs."""
def __init__(self, console=None):
def __init__(self, console: Any = None) -> None:
from rich.console import Console
self._console = console or Console()
self._state: dict[str, dict] = {}
self._live = None
self._state: dict[str, dict[str, Any]] = {}
self._live: Any = None
def start(self):
def start(self) -> None:
from rich.live import Live
self._live = Live(
@@ -649,13 +649,13 @@ class _BenchmarkLiveProgress:
)
self._live.start()
def stop(self):
def stop(self) -> None:
if self._live:
self._live.update(self._render())
self._live.stop()
self._live = None
def on_progress(self, event: dict) -> None:
def on_progress(self, event: dict[str, Any]) -> None:
t = event["type"]
model = event.get("model", "")
@@ -695,7 +695,7 @@ class _BenchmarkLiveProgress:
if self._live:
self._live.update(self._render())
def _render(self):
def _render(self) -> Any:
from rich import box
from rich.spinner import Spinner
from rich.table import Table
@@ -721,6 +721,7 @@ class _BenchmarkLiveProgress:
table.add_column("", no_wrap=True, justify="right") # cost
for model, info in self._state.items():
icon: Any
if info["status"] == "done":
icon = Text("", style="green")
color = _score_color(info["avg"])
@@ -758,7 +759,7 @@ class _BenchmarkLiveProgress:
def _test_new_agents(
agent_files: list,
agent_files: list[Any],
n_iterations: int,
model: str | None,
threshold: float,
@@ -782,7 +783,7 @@ def _test_new_agents(
tests_dir = Path("benchmarks")
# Collect valid agents + cases
jobs: list[dict] = []
jobs: list[dict[str, Any]] = []
for agent_path in agent_files:
agent_name = agent_path.stem
cases_path = tests_dir / f"{agent_name}_cases.json"
@@ -816,8 +817,8 @@ def _test_new_agents(
# Progress display — prefix model key with agent name
progress = None if verbose else _BenchmarkLiveProgress(console=_con)
def _make_progress_cb(agent_name: str):
def _cb(event: dict) -> None:
def _make_progress_cb(agent_name: str) -> Any:
def _cb(event: dict[str, Any]) -> None:
if progress is not None:
prefixed = dict(event)
if "model" in prefixed:
@@ -826,7 +827,7 @@ def _test_new_agents(
return _cb
async def _run_all():
async def _run_all() -> Any:
tasks = []
for job in jobs:
tasks.append(
@@ -858,6 +859,7 @@ def _test_new_agents(
)
if not verbose:
assert progress is not None
progress.start()
try:
with ArtifactsSandbox():
@@ -869,6 +871,7 @@ def _test_new_agents(
all_results = asyncio.run(_run_all())
finally:
if not verbose:
assert progress is not None
progress.stop()
# Evaluate results