From 37cfbe738999cb52db0625c976112e1f36a695f0 Mon Sep 17 00:00:00 2001
From: Lucas Gomide <lucaslg200@gmail.com>
Date: Wed, 9 Jul 2025 17:25:08 -0300
Subject: [PATCH] fix: do not evaluate Agent by default

This is a experimental feature we still need refine it further
---
 src/crewai/crew.py                          | 17 +++++++++++++++++
 src/crewai/evaluation/agent_evaluator.py    | 10 +++++-----
 src/crewai/evaluation/evaluation_display.py |  8 +-------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
index e1b7dc04f..89acee505 100644
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -1313,6 +1313,7 @@ class Crew(FlowTrackable, BaseModel):
         n_iterations: int,
         eval_llm: Union[str, InstanceOf[BaseLLM]],
         inputs: Optional[Dict[str, Any]] = None,
+        include_agent_eval: Optional[bool] = False
     ) -> None:
         """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
         try:
@@ -1331,13 +1332,29 @@ class Crew(FlowTrackable, BaseModel):
                 ),
             )
             test_crew = self.copy()
+
+            # TODO: Refator to use a single Evaluator Manage class
             evaluator = CrewEvaluator(test_crew, llm_instance)
 
+            if include_agent_eval:
+                from crewai.evaluation import create_default_evaluator
+                agent_evaluator = create_default_evaluator(crew=test_crew)
+
             for i in range(1, n_iterations + 1):
                 evaluator.set_iteration(i)
+
+                if include_agent_eval:
+                    agent_evaluator.set_iteration(i)
+
                 test_crew.kickoff(inputs=inputs)
 
+                # TODO: Refactor to use ListenerEvents instead of trigger each iteration manually
+                if include_agent_eval:
+                    agent_evaluator.evaluate_current_iteration()
+
             evaluator.print_crew_evaluation_result()
+            if include_agent_eval:
+                agent_evaluator.get_agent_evaluation(include_evaluation_feedback=True)
 
             crewai_event_bus.emit(
                 self,
diff --git a/src/crewai/evaluation/agent_evaluator.py b/src/crewai/evaluation/agent_evaluator.py
index 3627571b0..fa97421fb 100644
--- a/src/crewai/evaluation/agent_evaluator.py
+++ b/src/crewai/evaluation/agent_evaluator.py
@@ -45,8 +45,6 @@ class AgentEvaluator:
             raise ValueError("Cannot evaluate: no callback was set. Use set_callback() method first.")
 
         from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
-        self.console_formatter.print(f"\n[bold blue]📊 Running agent evaluations for iteration {self.iteration}...[/bold blue]\n")
-
         evaluation_results = defaultdict(list)
 
         total_evals = 0
@@ -101,7 +99,7 @@ class AgentEvaluator:
     def display_results_with_iterations(self):
         self.display_formatter.display_summary_results(self.iterations_results)
 
-    def get_agent_evaluation(self, strategy: AggregationStrategy = AggregationStrategy.SIMPLE_AVERAGE):
+    def get_agent_evaluation(self, strategy: AggregationStrategy = AggregationStrategy.SIMPLE_AVERAGE, include_evaluation_feedback: bool = False):
         agent_results = {}
         with crewai_event_bus.scoped_handlers():
             task_results = self.get_evaluation_results()
@@ -120,9 +118,11 @@ class AgentEvaluator:
 
                 agent_results[agent_role] = aggregated_result
 
-            if len(self.iterations_results) > 1 and self.iteration == max(self.iterations_results.keys()):
+
+            if self.iteration == max(self.iterations_results.keys()):
                 self.display_results_with_iterations()
-            elif agent_results:
+
+            if include_evaluation_feedback:
                 self.display_evaluation_results(agent_results)
 
         return agent_results
diff --git a/src/crewai/evaluation/evaluation_display.py b/src/crewai/evaluation/evaluation_display.py
index 0b2c21e3f..9be4070db 100644
--- a/src/crewai/evaluation/evaluation_display.py
+++ b/src/crewai/evaluation/evaluation_display.py
@@ -19,7 +19,7 @@ class EvaluationDisplayFormatter:
         for agent_role, result in agent_results.items():
             self.console_formatter.print(f"\n[bold cyan]Agent: {agent_role}[/bold cyan]\n")
 
-            table = Table(title=f"{agent_role} Evaluation Results", box=ROUNDED)
+            table = Table(box=ROUNDED)
             table.add_column("Metric", style="cyan")
             table.add_column("Score (1-10)", justify="center")
             table.add_column("Feedback", style="green")
@@ -70,12 +70,6 @@ class EvaluationDisplayFormatter:
             self.console_formatter.print("[yellow]No evaluation results to display[/yellow]")
             return
 
-        title = Panel(
-            "[bold]Agent Evaluation Summary[/bold]",
-            style="blue",
-            box=ROUNDED
-        )
-        self.console_formatter.print(title, justify="center")
         self.console_formatter.print("\n")
 
         table = Table(title="Agent Performance Scores \n (1-10 Higher is better)", box=HEAVY_EDGE)