From a723d991f5aa6ce71821f369fc8e2c497b15fb76 Mon Sep 17 00:00:00 2001 From: alex-clawd Date: Wed, 13 May 2026 09:03:14 -0700 Subject: [PATCH] fix: address three review comments on benchmark/test CLI - benchmark verbose path: pass on_progress callback the same way as the non-verbose path (was missing entirely) - _train_new_agents: replace per-case asyncio.run() with a single event loop (new_event_loop / run_until_complete / close) to avoid creating and destroying a loop on every case iteration - format_results_table: use case_index + 1 so the '#' column is 1-based, matching the display in _test_new_agents failed output Co-Authored-By: Claude Sonnet 4.6 --- lib/cli/src/crewai_cli/benchmark.py | 2 +- lib/cli/src/crewai_cli/cli.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/cli/src/crewai_cli/benchmark.py b/lib/cli/src/crewai_cli/benchmark.py index c16cd733b..78df4530d 100644 --- a/lib/cli/src/crewai_cli/benchmark.py +++ b/lib/cli/src/crewai_cli/benchmark.py @@ -676,7 +676,7 @@ def format_results_table(results: list[BenchmarkResult]) -> str: status = "PASS" if r.passed else "FAIL" tokens = f"{r.input_tokens}/{r.output_tokens}" input_trunc = r.input[:40] + "..." if len(r.input) > 40 else r.input - line = f"{r.case_index:<4} {status:<6} {r.score:<7.2f} {tokens:<12} {r.response_time_ms:<10} {input_trunc}" + line = f"{r.case_index + 1:<4} {status:<6} {r.score:<7.2f} {tokens:<12} {r.response_time_ms:<10} {input_trunc}" lines.append(line) if r.passed: diff --git a/lib/cli/src/crewai_cli/cli.py b/lib/cli/src/crewai_cli/cli.py index 47cd3b448..2ec1520ad 100644 --- a/lib/cli/src/crewai_cli/cli.py +++ b/lib/cli/src/crewai_cli/cli.py @@ -243,6 +243,8 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None: from rich.console import Console as _Console _console = _Console() + _loop = asyncio.new_event_loop() + asyncio.set_event_loop(_loop) for iteration in range(n_iterations): click.secho(f"\n Iteration {iteration + 1}/{n_iterations}", fg="cyan") @@ -256,7 +258,7 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None: _t0 = _time.monotonic() with _console.status("[cyan] Running…[/]", spinner="dots"): - response = asyncio.run(agent.amessage(user_input)) + response = _loop.run_until_complete(agent.amessage(user_input)) _elapsed = _time.monotonic() - _t0 _console.print(f" [green]✓[/] done ({_elapsed:.1f}s)") click.echo(f" Response: {response.content[:500]}") @@ -279,6 +281,7 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None: ) click.secho(" ✓ Feedback saved as canonical memory", fg="green") + _loop.close() agents_trained += 1 click.echo() @@ -1755,6 +1758,7 @@ def benchmark( cases=cases, models=model_list, judge_model=judge_model, + on_progress=progress.on_progress if progress else None, verbose=verbose, ) )