From a723d991f5aa6ce71821f369fc8e2c497b15fb76 Mon Sep 17 00:00:00 2001
From: alex-clawd <alex@crewai.com>
Date: Wed, 13 May 2026 09:03:14 -0700
Subject: [PATCH] fix: address three review comments on benchmark/test CLI

- benchmark verbose path: pass on_progress callback the same way as
  the non-verbose path (was missing entirely)
- _train_new_agents: replace per-case asyncio.run() with a single
  event loop (new_event_loop / run_until_complete / close) to avoid
  creating and destroying a loop on every case iteration
- format_results_table: use case_index + 1 so the '#' column is
  1-based, matching the display in _test_new_agents failed output

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lib/cli/src/crewai_cli/benchmark.py | 2 +-
 lib/cli/src/crewai_cli/cli.py       | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/cli/src/crewai_cli/benchmark.py b/lib/cli/src/crewai_cli/benchmark.py
index c16cd733b..78df4530d 100644
--- a/lib/cli/src/crewai_cli/benchmark.py
+++ b/lib/cli/src/crewai_cli/benchmark.py
@@ -676,7 +676,7 @@ def format_results_table(results: list[BenchmarkResult]) -> str:
         status = "PASS" if r.passed else "FAIL"
         tokens = f"{r.input_tokens}/{r.output_tokens}"
         input_trunc = r.input[:40] + "..." if len(r.input) > 40 else r.input
-        line = f"{r.case_index:<4} {status:<6} {r.score:<7.2f} {tokens:<12} {r.response_time_ms:<10} {input_trunc}"
+        line = f"{r.case_index + 1:<4} {status:<6} {r.score:<7.2f} {tokens:<12} {r.response_time_ms:<10} {input_trunc}"
         lines.append(line)
 
         if r.passed:
diff --git a/lib/cli/src/crewai_cli/cli.py b/lib/cli/src/crewai_cli/cli.py
index 47cd3b448..2ec1520ad 100644
--- a/lib/cli/src/crewai_cli/cli.py
+++ b/lib/cli/src/crewai_cli/cli.py
@@ -243,6 +243,8 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None:
         from rich.console import Console as _Console
 
         _console = _Console()
+        _loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(_loop)
 
         for iteration in range(n_iterations):
             click.secho(f"\n  Iteration {iteration + 1}/{n_iterations}", fg="cyan")
@@ -256,7 +258,7 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None:
 
                     _t0 = _time.monotonic()
                     with _console.status("[cyan]  Running…[/]", spinner="dots"):
-                        response = asyncio.run(agent.amessage(user_input))
+                        response = _loop.run_until_complete(agent.amessage(user_input))
                     _elapsed = _time.monotonic() - _t0
                     _console.print(f"  [green]✓[/] done ({_elapsed:.1f}s)")
                     click.echo(f"  Response: {response.content[:500]}")
@@ -279,6 +281,7 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None:
                     )
                     click.secho("  ✓ Feedback saved as canonical memory", fg="green")
 
+        _loop.close()
         agents_trained += 1
 
     click.echo()
@@ -1755,6 +1758,7 @@ def benchmark(
                             cases=cases,
                             models=model_list,
                             judge_model=judge_model,
+                            on_progress=progress.on_progress if progress else None,
                             verbose=verbose,
                         )
                     )