feat: Add execution time to both task and testing feature (#1031)

* feat: Add execution time to both task and testing feature * feat: Remove unused functions * feat: change test_crew to evalaute_crew to avoid issues with testing libs * feat: fix tests
2026-01-09 16:18:30 +00:00 · 2024-07-29 23:17:07 -03:00
parent de6b597eff
commit d824db82a3
7 changed files with 62 additions and 33 deletions
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -28,6 +28,7 @@ class CrewEvaluator:
    """

    tasks_scores: defaultdict = defaultdict(list)
+    run_execution_times: defaultdict = defaultdict(list)
    iteration: int = 0

    def __init__(self, crew, openai_model_name: str):
@@ -40,9 +41,6 @@ class CrewEvaluator:
        for task in self.crew.tasks:
            task.callback = self.evaluate

-    def set_iteration(self, iteration: int) -> None:
-        self.iteration = iteration
-
    def _evaluator_agent(self):
        return Agent(
            role="Task Execution Evaluator",
@@ -71,6 +69,9 @@ class CrewEvaluator:
            output_pydantic=TaskEvaluationPydanticOutput,
        )

+    def set_iteration(self, iteration: int) -> None:
+        self.iteration = iteration
+
    def print_crew_evaluation_result(self) -> None:
        """
        Prints the evaluation result of the crew in a table.
@@ -119,6 +120,16 @@ class CrewEvaluator:
        ]
        table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")

+        run_exec_times = [
+            int(sum(tasks_exec_times))
+            for _, tasks_exec_times in self.run_execution_times.items()
+        ]
+        execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
+        table.add_row(
+            "Execution Time (s)",
+            *map(str, run_exec_times),
+            f"{execution_time_avg}",
+        )
        # Display the table in the terminal
        console = Console()
        console.print(table)
@@ -145,5 +156,8 @@ class CrewEvaluator:

        if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
+            self.run_execution_times[self.iteration].append(
+                current_task._execution_time
+            )
        else:
            raise ValueError("Evaluation result is not in the expected format")