fix: address remaining PR review comments — null guard, markup escaping, empty criteria

- Add null check after _load_agent() in benchmark runner (agent can return None on circular refs) - Escape user-sourced content in Rich markup via _safe_render() in memory panel and skills list - Default to passed=True when benchmark case has neither expected nor criteria Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-07-02 05:38:12 +00:00 · 2026-05-14 14:04:40 -04:00
parent 16488f5fe5
commit 2eb7e15f89
2 changed files with 29 additions and 4 deletions
--- a/lib/cli/src/crewai_cli/agent_tui.py
+++ b/lib/cli/src/crewai_cli/agent_tui.py
@@ -1120,7 +1120,7 @@ class AgentTUI(App[None]):

        lines = [f"[bold]Active Skills[/] ({len(active)})"]
        for s in active:
-            lines.append(f"  [{_CORAL}]{s.name}[/] — {s.description}")
+            lines.append(f"  [{_CORAL}]{_safe_render(s.name)}[/] — {_safe_render(s.description)}")
        self._mount_sys("\n".join(lines))

    def _handle_tasks_command(self, parts: list[str]) -> None:
@@ -1219,13 +1219,13 @@ class AgentTUI(App[None]):
            if mem_type == "canonical"
            else f"[dim]{mem_type}[/]"
        )
-        importance_tag = f" [yellow]★{importance}[/]" if importance else ""
-        scope_tag = f" [{_DIM}]scope:{scope}[/]" if scope else ""
+        importance_tag = f" [yellow]★{_safe_render(str(importance))}[/]" if importance else ""
+        scope_tag = f" [{_DIM}]scope:{_safe_render(str(scope))}[/]" if scope else ""
        time_tag = f" [{_DIM}]{timestamp}[/]" if timestamp else ""

        return [
            f"  {i}. {type_tag}{importance_tag}{scope_tag}{time_tag}",
-            f"     {content}",
+            f"     {_safe_render(content)}",
            "",
        ]

--- a/lib/cli/src/crewai_cli/benchmark.py
+++ b/lib/cli/src/crewai_cli/benchmark.py
@@ -304,6 +304,29 @@ async def _run_model_benchmark(
                    score=0.0,
                )

+            if agent is None:
+                emit(
+                    {
+                        "type": "case_done",
+                        "model": model,
+                        "case_index": i,
+                        "total_cases": total,
+                        "passed": False,
+                        "score": 0.0,
+                        "time_ms": 0,
+                        "error": "agent loader returned None",
+                    }
+                )
+                return BenchmarkResult(
+                    case_index=i,
+                    input=case.input,
+                    expected=case.expected,
+                    actual="[Agent loader returned None]",
+                    model=model,
+                    passed=False,
+                    score=0.0,
+                )
+
            start_ms = _current_time_ms()
            try:
                response = await asyncio.wait_for(
@@ -365,6 +388,8 @@ async def _run_model_benchmark(
                )

            passed, score = False, 0.0
+            if case.expected is None and case.criteria is None:
+                passed, score = True, 1.0
            if case.expected is not None:
                passed, score = _check_expected(case.expected, actual)
            if case.criteria is not None: