From 2d82896d71f6da0b4007fe352aa1f162a2132cd2 Mon Sep 17 00:00:00 2001 From: Joao Moura Date: Thu, 14 May 2026 16:48:17 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20address=20PR=20review=20comments=20?= =?UTF-8?q?=E2=80=94=20lint,=20threshold,=20dedup,=20agents=5Fdir?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove redundant local `import asyncio` in executor.py that caused ruff F823 (local variable referenced before assignment) - Clear progress state before creating Live display (fixes flash) - Use threshold-based passed in _save_run_results so persisted results match CLI output - Pass agents_dir to load_agent_from_definition in _train_new_agents so coworker references resolve correctly - Deduplicate verbose/non-verbose benchmark execution blocks into single context-manager expression Co-Authored-By: Claude Opus 4.6 --- lib/cli/src/crewai_cli/cli.py | 61 ++++++++------------- lib/crewai/src/crewai/new_agent/executor.py | 2 - 2 files changed, 24 insertions(+), 39 deletions(-) diff --git a/lib/cli/src/crewai_cli/cli.py b/lib/cli/src/crewai_cli/cli.py index f77694518..7ea58b6c2 100644 --- a/lib/cli/src/crewai_cli/cli.py +++ b/lib/cli/src/crewai_cli/cli.py @@ -235,7 +235,9 @@ def _train_new_agents(agent_files: list[Any], n_iterations: int) -> None: try: from crewai.new_agent.definition_parser import load_agent_from_definition - agent = load_agent_from_definition(str(agent_path)) + agent = load_agent_from_definition( + str(agent_path), agents_dir=str(agent_path.parent) + ) except Exception as e: click.secho(f" Error loading agent {agent_name}: {e}", fg="red") continue @@ -681,12 +683,15 @@ def _save_run_results( cases: list[dict[str, Any]] = [] for r in result_list: + effective_passed = ( + r.score >= threshold if threshold is not None else r.passed + ) case: dict[str, Any] = { "case": r.case_index + 1, "input": r.input, "output": r.actual, "score": r.score, - "passed": r.passed, + "passed": effective_passed, "time_ms": r.response_time_ms, "input_tokens": r.input_tokens, "output_tokens": r.output_tokens, @@ -730,13 +735,13 @@ class _BenchmarkLiveProgress: from rich.live import Live self._current_iteration = iteration + self._state.clear() self._live = Live( self._render(), console=self._console, refresh_per_second=10, transient=True, ) - self._state.clear() self._live.start() def stop(self) -> None: @@ -987,13 +992,9 @@ def _test_new_agents( if progress is None: raise RuntimeError("progress must not be None in non-verbose mode") progress.start(iteration=iteration) - with ArtifactsSandbox(): - if verbose: - with VerboseBenchmarkOutput(): - all_results = _loop.run_until_complete(_run_all()) - else: - with SuppressBenchmarkOutput(): - all_results = _loop.run_until_complete(_run_all()) + output_ctx = VerboseBenchmarkOutput() if verbose else SuppressBenchmarkOutput() + with ArtifactsSandbox(), output_ctx: + all_results = _loop.run_until_complete(_run_all()) finally: if not verbose: if progress is None: @@ -1957,33 +1958,19 @@ def benchmark( try: if progress: progress.start() - with ArtifactsSandbox(): - if verbose: - with VerboseBenchmarkOutput(): - results_by_model = _loop.run_until_complete( - run_benchmark( - agent_def=agent_path, - cases=cases, - models=model_list, - judge_model=judge_model, - on_progress=progress.on_progress if progress else None, - verbose=verbose, - case_timeout=effective_timeout, - ) - ) - else: - with SuppressBenchmarkOutput(): - results_by_model = _loop.run_until_complete( - run_benchmark( - agent_def=agent_path, - cases=cases, - models=model_list, - judge_model=judge_model, - on_progress=progress.on_progress if progress else None, - verbose=verbose, - case_timeout=effective_timeout, - ) - ) + output_ctx = VerboseBenchmarkOutput() if verbose else SuppressBenchmarkOutput() + with ArtifactsSandbox(), output_ctx: + results_by_model = _loop.run_until_complete( + run_benchmark( + agent_def=agent_path, + cases=cases, + models=model_list, + judge_model=judge_model, + on_progress=progress.on_progress if progress else None, + verbose=verbose, + case_timeout=effective_timeout, + ) + ) except Exception as e: click.secho(f"Error running benchmark: {e}", fg="red") raise SystemExit(1) from e diff --git a/lib/crewai/src/crewai/new_agent/executor.py b/lib/crewai/src/crewai/new_agent/executor.py index 70f3fe972..640d32e1b 100644 --- a/lib/crewai/src/crewai/new_agent/executor.py +++ b/lib/crewai/src/crewai/new_agent/executor.py @@ -2109,8 +2109,6 @@ class ConversationalAgentExecutor(BaseModel): if self.conversation_history else "", ) - import asyncio - loop = asyncio.get_event_loop() if loop.is_running(): asyncio.ensure_future(self.provider.send_message(hint_msg))