mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-01 13:18:10 +00:00
fix: resolve lint, test, and review issues
- Replace S101 assert guards with explicit if/raise RuntimeError in benchmark.py and cli.py (3 locations) - Fix test_create_llm_from_env_with_unaccepted_attributes to use DEFAULT_LLM_MODEL with clear=True so the assertion isn't brittle against the hardcoded model name - Add n_iterations loop to _test_new_agents (was unused, now mirrors _train_new_agents iteration pattern) - Consolidate dotenv loading in cli.py and agent_tui.py to use the existing load_env_vars() from utils.py instead of duplicating logic Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,7 @@ from textual.widgets import (
|
||||
)
|
||||
|
||||
from crewai_cli.create_agent import _strip_jsonc
|
||||
from crewai_cli.utils import load_env_vars
|
||||
|
||||
|
||||
try:
|
||||
@@ -1703,18 +1704,9 @@ class AgentTUI(App[None]):
|
||||
|
||||
def _load_dotenv(base: Path) -> None:
|
||||
"""Load .env file into os.environ if it exists."""
|
||||
env_path = base / ".env"
|
||||
if not env_path.exists():
|
||||
return
|
||||
try:
|
||||
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
key, _, value = line.partition("=")
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
if key and value and key not in os.environ:
|
||||
for key, value in load_env_vars(base).items():
|
||||
if key not in os.environ:
|
||||
os.environ[key] = value
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -136,7 +136,8 @@ async def _judge_with_llm(
|
||||
from crewai.utilities.llm_utils import create_llm
|
||||
|
||||
judge_llm = create_llm(judge_model)
|
||||
assert judge_llm is not None
|
||||
if judge_llm is None:
|
||||
raise RuntimeError(f"Failed to create LLM from judge model: {judge_model!r}")
|
||||
|
||||
prompt = (
|
||||
"You are an evaluation judge. Score the following response on a scale of 0.0 to 1.0.\n\n"
|
||||
@@ -445,7 +446,7 @@ async def run_benchmark(
|
||||
class SuppressBenchmarkOutput:
|
||||
"""Context manager that silences console formatter and noisy logging during benchmarks."""
|
||||
|
||||
def __enter__(self) -> "SuppressBenchmarkOutput":
|
||||
def __enter__(self) -> SuppressBenchmarkOutput:
|
||||
import logging
|
||||
|
||||
self._saved_formatter = None
|
||||
@@ -491,7 +492,7 @@ class SuppressBenchmarkOutput:
|
||||
class VerboseBenchmarkOutput:
|
||||
"""Context manager that subscribes to NewAgent events and prints them for debugging."""
|
||||
|
||||
def __enter__(self) -> "VerboseBenchmarkOutput":
|
||||
def __enter__(self) -> VerboseBenchmarkOutput:
|
||||
import logging
|
||||
import sys
|
||||
|
||||
@@ -620,7 +621,7 @@ class ArtifactsSandbox:
|
||||
self._base = Path(base)
|
||||
self._prev_cwd: str | None = None
|
||||
|
||||
def __enter__(self) -> "ArtifactsSandbox":
|
||||
def __enter__(self) -> ArtifactsSandbox:
|
||||
import os
|
||||
|
||||
self._base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -37,7 +37,11 @@ from crewai_cli.user_data import (
|
||||
is_tracing_enabled,
|
||||
update_user_data,
|
||||
)
|
||||
from crewai_cli.utils import build_env_with_all_tool_credentials, read_toml
|
||||
from crewai_cli.utils import (
|
||||
build_env_with_all_tool_credentials,
|
||||
load_env_vars,
|
||||
read_toml,
|
||||
)
|
||||
|
||||
|
||||
def _get_cli_version() -> str:
|
||||
@@ -59,19 +63,12 @@ def crewai() -> None:
|
||||
"""Top-level command group for crewai."""
|
||||
from pathlib import Path
|
||||
|
||||
env_path = Path.cwd() / ".env"
|
||||
if env_path.exists():
|
||||
try:
|
||||
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
key, _, value = line.partition("=")
|
||||
key, value = key.strip(), value.strip()
|
||||
if key and value and key not in os.environ:
|
||||
os.environ[key] = value
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
for key, value in load_env_vars(Path.cwd()).items():
|
||||
if key not in os.environ:
|
||||
os.environ[key] = value
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@crewai.command(
|
||||
@@ -847,7 +844,7 @@ def _test_new_agents(
|
||||
case_count = sum(len(j["cases"]) for j in jobs)
|
||||
click.echo()
|
||||
click.secho(
|
||||
f"Testing {len(jobs)} agent(s), {case_count} cases (threshold={threshold})",
|
||||
f"Testing {len(jobs)} agent(s), {case_count} cases, {n_iterations} iteration(s) (threshold={threshold})",
|
||||
fg="cyan",
|
||||
bold=True,
|
||||
)
|
||||
@@ -858,51 +855,59 @@ def _test_new_agents(
|
||||
VerboseBenchmarkOutput,
|
||||
)
|
||||
|
||||
if not verbose:
|
||||
assert progress is not None
|
||||
progress.start()
|
||||
try:
|
||||
with ArtifactsSandbox():
|
||||
if verbose:
|
||||
with VerboseBenchmarkOutput():
|
||||
all_results = asyncio.run(_run_all())
|
||||
else:
|
||||
with SuppressBenchmarkOutput():
|
||||
all_results = asyncio.run(_run_all())
|
||||
finally:
|
||||
if not verbose:
|
||||
assert progress is not None
|
||||
progress.stop()
|
||||
|
||||
# Evaluate results
|
||||
all_passed = True
|
||||
agents_tested = 0
|
||||
for job, result in zip(jobs, all_results):
|
||||
if isinstance(result, Exception):
|
||||
click.secho(
|
||||
f" Error running tests for {job['agent_name']}: {result}", fg="red"
|
||||
)
|
||||
all_passed = False
|
||||
continue
|
||||
|
||||
agents_tested += 1
|
||||
for results in result.values():
|
||||
failed = [r for r in results if r.score < job["threshold"]]
|
||||
if failed:
|
||||
for iteration in range(n_iterations):
|
||||
if n_iterations > 1:
|
||||
click.secho(f"\n Iteration {iteration + 1}/{n_iterations}", fg="cyan")
|
||||
|
||||
if not verbose:
|
||||
if progress is None:
|
||||
raise RuntimeError("progress must not be None in non-verbose mode")
|
||||
progress.start()
|
||||
try:
|
||||
with ArtifactsSandbox():
|
||||
if verbose:
|
||||
with VerboseBenchmarkOutput():
|
||||
all_results = asyncio.run(_run_all())
|
||||
else:
|
||||
with SuppressBenchmarkOutput():
|
||||
all_results = asyncio.run(_run_all())
|
||||
finally:
|
||||
if not verbose:
|
||||
if progress is None:
|
||||
raise RuntimeError("progress must not be None in non-verbose mode")
|
||||
progress.stop()
|
||||
|
||||
# Evaluate results for this iteration
|
||||
for job, result in zip(jobs, all_results):
|
||||
if isinstance(result, Exception):
|
||||
click.secho(
|
||||
f" Error running tests for {job['agent_name']}: {result}", fg="red"
|
||||
)
|
||||
all_passed = False
|
||||
_con.print(
|
||||
f" [red bold]{job['agent_name']}: FAILED {len(failed)}/{len(results)} "
|
||||
f"cases below threshold ({job['threshold']})[/red bold]"
|
||||
)
|
||||
for r in failed:
|
||||
inp = r.input[:60] + ("…" if len(r.input) > 60 else "")
|
||||
continue
|
||||
|
||||
agents_tested += 1
|
||||
for results in result.values():
|
||||
failed = [r for r in results if r.score < job["threshold"]]
|
||||
if failed:
|
||||
all_passed = False
|
||||
_con.print(
|
||||
f" [red]#{r.case_index + 1}[/red] [dim]{inp}[/dim] [red]{r.score:.2f}[/red]"
|
||||
f" [red bold]{job['agent_name']}: FAILED {len(failed)}/{len(results)} "
|
||||
f"cases below threshold ({job['threshold']})[/red bold]"
|
||||
)
|
||||
else:
|
||||
_con.print(
|
||||
f" [green bold]{job['agent_name']}: PASSED all {len(results)} cases >= {job['threshold']}[/green bold]"
|
||||
)
|
||||
for r in failed:
|
||||
inp = r.input[:60] + ("…" if len(r.input) > 60 else "")
|
||||
_con.print(
|
||||
f" [red]#{r.case_index + 1}[/red] [dim]{inp}[/dim] [red]{r.score:.2f}[/red]"
|
||||
)
|
||||
else:
|
||||
_con.print(
|
||||
f" [green bold]{job['agent_name']}: PASSED all {len(results)} cases >= {job['threshold']}[/green bold]"
|
||||
)
|
||||
|
||||
if agents_tested == 0:
|
||||
click.secho("No agents completed successfully.", fg="yellow")
|
||||
raise SystemExit(1)
|
||||
|
||||
@@ -77,16 +77,16 @@ def test_create_llm_from_env_with_unaccepted_attributes() -> None:
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"OPENAI_MODEL_NAME": "gpt-3.5-turbo",
|
||||
"OPENAI_API_KEY": "fake-key",
|
||||
"AWS_ACCESS_KEY_ID": "fake-access-key",
|
||||
"AWS_SECRET_ACCESS_KEY": "fake-secret-key",
|
||||
"AWS_DEFAULT_REGION": "us-west-2",
|
||||
},
|
||||
clear=True,
|
||||
):
|
||||
llm = create_llm(llm_value=None)
|
||||
assert isinstance(llm, BaseLLM)
|
||||
assert llm.model == "gpt-3.5-turbo"
|
||||
assert llm.model == DEFAULT_LLM_MODEL
|
||||
assert not hasattr(llm, "AWS_ACCESS_KEY_ID")
|
||||
assert not hasattr(llm, "AWS_SECRET_ACCESS_KEY")
|
||||
assert not hasattr(llm, "AWS_DEFAULT_REGION")
|
||||
|
||||
Reference in New Issue
Block a user