mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
* feat: add exchanged messages in LLMCallCompletedEvent * feat: add GoalAlignment metric for Agent evaluation * feat: add SemanticQuality metric for Agent evaluation * feat: add Tool Metrics for Agent evaluation * feat: add Reasoning Metrics for Agent evaluation, still in progress * feat: add AgentEvaluator class This class will evaluate Agent' results and report to user * fix: do not evaluate Agent by default This is a experimental feature we still need refine it further * test: add Agent eval tests * fix: render all feedback per iteration * style: resolve linter issues * style: fix mypy issues * fix: allow messages be empty on LLMCallCompletedEvent * feat: add Experiment evaluation framework with baseline comparison * fix: reset evaluator for each experiement iteraction * fix: fix track of new test cases * chore: split Experimental evaluation classes * refactor: remove unused method * refactor: isolate Console print in a dedicated class * fix: make crew required to run an experiment * fix: use time-aware to define experiment result * test: add tests for Evaluator Experiment * style: fix linter issues * fix: encode string before hashing * style: resolve linter issues * feat: add experimental folder for beta features (#3141) * test: move tests to experimental folder
71 lines
2.9 KiB
Python
71 lines
2.9 KiB
Python
from typing import Dict, Any
|
||
from rich.console import Console
|
||
from rich.table import Table
|
||
from rich.panel import Panel
|
||
from crewai.experimental.evaluation.experiment.result import ExperimentResults
|
||
|
||
class ExperimentResultsDisplay:
|
||
def __init__(self):
|
||
self.console = Console()
|
||
|
||
def summary(self, experiment_results: ExperimentResults):
|
||
total = len(experiment_results.results)
|
||
passed = sum(1 for r in experiment_results.results if r.passed)
|
||
|
||
table = Table(title="Experiment Summary")
|
||
table.add_column("Metric", style="cyan")
|
||
table.add_column("Value", style="green")
|
||
|
||
table.add_row("Total Test Cases", str(total))
|
||
table.add_row("Passed", str(passed))
|
||
table.add_row("Failed", str(total - passed))
|
||
table.add_row("Success Rate", f"{(passed / total * 100):.1f}%" if total > 0 else "N/A")
|
||
|
||
self.console.print(table)
|
||
|
||
def comparison_summary(self, comparison: Dict[str, Any], baseline_timestamp: str):
|
||
self.console.print(Panel(f"[bold]Comparison with baseline run from {baseline_timestamp}[/bold]",
|
||
expand=False))
|
||
|
||
table = Table(title="Results Comparison")
|
||
table.add_column("Metric", style="cyan")
|
||
table.add_column("Count", style="white")
|
||
table.add_column("Details", style="dim")
|
||
|
||
improved = comparison.get("improved", [])
|
||
if improved:
|
||
details = ", ".join([f"{test_identifier}" for test_identifier in improved[:3]])
|
||
if len(improved) > 3:
|
||
details += f" and {len(improved) - 3} more"
|
||
table.add_row("✅ Improved", str(len(improved)), details)
|
||
else:
|
||
table.add_row("✅ Improved", "0", "")
|
||
|
||
regressed = comparison.get("regressed", [])
|
||
if regressed:
|
||
details = ", ".join([f"{test_identifier}" for test_identifier in regressed[:3]])
|
||
if len(regressed) > 3:
|
||
details += f" and {len(regressed) - 3} more"
|
||
table.add_row("❌ Regressed", str(len(regressed)), details, style="red")
|
||
else:
|
||
table.add_row("❌ Regressed", "0", "")
|
||
|
||
unchanged = comparison.get("unchanged", [])
|
||
table.add_row("⏺ Unchanged", str(len(unchanged)), "")
|
||
|
||
new_tests = comparison.get("new_tests", [])
|
||
if new_tests:
|
||
details = ", ".join(new_tests[:3])
|
||
if len(new_tests) > 3:
|
||
details += f" and {len(new_tests) - 3} more"
|
||
table.add_row("➕ New Tests", str(len(new_tests)), details)
|
||
|
||
missing_tests = comparison.get("missing_tests", [])
|
||
if missing_tests:
|
||
details = ", ".join(missing_tests[:3])
|
||
if len(missing_tests) > 3:
|
||
details += f" and {len(missing_tests) - 3} more"
|
||
table.add_row("➖ Missing Tests", str(len(missing_tests)), details)
|
||
|
||
self.console.print(table)
|