mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-11 00:58:30 +00:00
fix: render all feedback per iteration
This commit is contained in:
@@ -123,12 +123,12 @@ class AgentEvaluator:
|
|||||||
self.display_results_with_iterations()
|
self.display_results_with_iterations()
|
||||||
|
|
||||||
if include_evaluation_feedback:
|
if include_evaluation_feedback:
|
||||||
self.display_evaluation_results(agent_results)
|
self.display_evaluation_with_feedback()
|
||||||
|
|
||||||
return agent_results
|
return agent_results
|
||||||
|
|
||||||
def display_evaluation_results(self, agent_results: Dict[str, AgentAggregatedEvaluationResult]):
|
def display_evaluation_with_feedback(self):
|
||||||
self.display_formatter.display_evaluation_results(agent_results)
|
self.display_formatter.display_evaluation_with_feedback(self.iterations_results)
|
||||||
|
|
||||||
def evaluate(
|
def evaluate(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.box import HEAVY_EDGE, ROUNDED
|
from rich.box import HEAVY_EDGE, ROUNDED
|
||||||
from rich.panel import Panel
|
|
||||||
from crewai.evaluation.base_evaluator import AgentAggregatedEvaluationResult, AggregationStrategy
|
from crewai.evaluation.base_evaluator import AgentAggregatedEvaluationResult, AggregationStrategy
|
||||||
from crewai.evaluation import EvaluationScore
|
from crewai.evaluation import EvaluationScore
|
||||||
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
|
from crewai.utilities.events.utils.console_formatter import ConsoleFormatter
|
||||||
@@ -11,59 +10,87 @@ class EvaluationDisplayFormatter:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.console_formatter = ConsoleFormatter()
|
self.console_formatter = ConsoleFormatter()
|
||||||
|
|
||||||
def display_evaluation_results(self, agent_results: Dict[str, AgentAggregatedEvaluationResult]):
|
def display_evaluation_with_feedback(self, iterations_results: Dict[int, Dict[str, List[Any]]]):
|
||||||
if not agent_results:
|
if not iterations_results:
|
||||||
self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
|
self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
|
||||||
return
|
return
|
||||||
|
|
||||||
for agent_role, result in agent_results.items():
|
# Get all agent roles across all iterations
|
||||||
self.console_formatter.print(f"\n[bold cyan]Agent: {agent_role}[/bold cyan]\n")
|
all_agent_roles = set()
|
||||||
|
for iter_results in iterations_results.values():
|
||||||
|
all_agent_roles.update(iter_results.keys())
|
||||||
|
|
||||||
table = Table(box=ROUNDED)
|
for agent_role in sorted(all_agent_roles):
|
||||||
table.add_column("Metric", style="cyan")
|
self.console_formatter.print(f"\n[bold cyan]Agent: {agent_role}[/bold cyan]")
|
||||||
table.add_column("Score (1-10)", justify="center")
|
|
||||||
table.add_column("Feedback", style="green")
|
|
||||||
|
|
||||||
for metric, evaluation_score in result.metrics.items():
|
# Process each iteration
|
||||||
score = evaluation_score.score if evaluation_score.score is not None else "N/A"
|
for iter_num, results in sorted(iterations_results.items()):
|
||||||
|
if agent_role not in results or not results[agent_role]:
|
||||||
|
continue
|
||||||
|
|
||||||
if isinstance(score, (int, float)) and score is not None:
|
agent_results = results[agent_role]
|
||||||
if score >= 8.0:
|
agent_id = agent_results[0].agent_id
|
||||||
score_text = f"[green]{score:.1f}[/green]"
|
|
||||||
elif score >= 6.0:
|
# Aggregate results for this agent in this iteration
|
||||||
score_text = f"[cyan]{score:.1f}[/cyan]"
|
aggregated_result = self._aggregate_agent_results(
|
||||||
elif score >= 4.0:
|
agent_id=agent_id,
|
||||||
score_text = f"[yellow]{score:.1f}[/yellow]"
|
agent_role=agent_role,
|
||||||
|
results=agent_results,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Display iteration header
|
||||||
|
self.console_formatter.print(f"\n[bold]Iteration {iter_num}[/bold]")
|
||||||
|
|
||||||
|
# Create table for this iteration
|
||||||
|
table = Table(box=ROUNDED)
|
||||||
|
table.add_column("Metric", style="cyan")
|
||||||
|
table.add_column("Score (1-10)", justify="center")
|
||||||
|
table.add_column("Feedback", style="green")
|
||||||
|
|
||||||
|
# Add metrics to table
|
||||||
|
if aggregated_result.metrics:
|
||||||
|
for metric, evaluation_score in aggregated_result.metrics.items():
|
||||||
|
score = evaluation_score.score if evaluation_score.score is not None else "N/A"
|
||||||
|
|
||||||
|
if isinstance(score, (int, float)) and score is not None:
|
||||||
|
if score >= 8.0:
|
||||||
|
score_text = f"[green]{score:.1f}[/green]"
|
||||||
|
elif score >= 6.0:
|
||||||
|
score_text = f"[cyan]{score:.1f}[/cyan]"
|
||||||
|
elif score >= 4.0:
|
||||||
|
score_text = f"[yellow]{score:.1f}[/yellow]"
|
||||||
|
else:
|
||||||
|
score_text = f"[red]{score:.1f}[/red]"
|
||||||
|
else:
|
||||||
|
score_text = "[dim]N/A[/dim]"
|
||||||
|
|
||||||
|
table.add_section()
|
||||||
|
table.add_row(
|
||||||
|
metric.title(),
|
||||||
|
score_text,
|
||||||
|
evaluation_score.feedback or ""
|
||||||
|
)
|
||||||
|
|
||||||
|
if aggregated_result.overall_score is not None:
|
||||||
|
overall_score = aggregated_result.overall_score
|
||||||
|
if overall_score >= 8.0:
|
||||||
|
overall_color = "green"
|
||||||
|
elif overall_score >= 6.0:
|
||||||
|
overall_color = "cyan"
|
||||||
|
elif overall_score >= 4.0:
|
||||||
|
overall_color = "yellow"
|
||||||
else:
|
else:
|
||||||
score_text = f"[red]{score:.1f}[/red]"
|
overall_color = "red"
|
||||||
else:
|
|
||||||
score_text = "[dim]N/A[/dim]"
|
|
||||||
|
|
||||||
table.add_section()
|
table.add_section()
|
||||||
table.add_row(
|
table.add_row(
|
||||||
metric.title(),
|
"Overall Score",
|
||||||
score_text,
|
f"[{overall_color}]{overall_score:.1f}[/]",
|
||||||
evaluation_score.feedback or ""
|
"Overall agent evaluation score"
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.overall_score is not None:
|
# Print the table for this iteration
|
||||||
if result.overall_score >= 8.0:
|
self.console_formatter.print(table)
|
||||||
color = "green"
|
|
||||||
elif result.overall_score >= 6.0:
|
|
||||||
color = "cyan"
|
|
||||||
elif result.overall_score >= 4.0:
|
|
||||||
color = "yellow"
|
|
||||||
else:
|
|
||||||
color = "red"
|
|
||||||
|
|
||||||
table.add_section()
|
|
||||||
table.add_row(
|
|
||||||
"[bold]Overall Score[/bold]",
|
|
||||||
f"[bold {color}]{result.overall_score:.1f}[/bold {color}]",
|
|
||||||
""
|
|
||||||
)
|
|
||||||
|
|
||||||
self.console_formatter.print(table)
|
|
||||||
|
|
||||||
def display_summary_results(self, iterations_results: Dict[int, Dict[str, List[AgentAggregatedEvaluationResult]]]):
|
def display_summary_results(self, iterations_results: Dict[int, Dict[str, List[AgentAggregatedEvaluationResult]]]):
|
||||||
if not iterations_results:
|
if not iterations_results:
|
||||||
|
|||||||
@@ -155,6 +155,7 @@ class CrewEvaluator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
|
console.print("\n")
|
||||||
console.print(table)
|
console.print(table)
|
||||||
|
|
||||||
def evaluate(self, task_output: TaskOutput):
|
def evaluate(self, task_output: TaskOutput):
|
||||||
|
|||||||
Reference in New Issue
Block a user