diff --git a/src/crewai/cli/test_crew.py b/src/crewai/cli/test_crew.py index b95669e55..9bbe7bf47 100644 --- a/src/crewai/cli/test_crew.py +++ b/src/crewai/cli/test_crew.py @@ -1,8 +1,8 @@ import subprocess -import click -import pytest -pytest.skip(allow_module_level=True) +import click + +# pytest.skip(allow_module_level=True) def test_crew(n_iterations: int, model: str) -> None: diff --git a/src/crewai/task.py b/src/crewai/task.py index 14c1e30e5..3e693a498 100644 --- a/src/crewai/task.py +++ b/src/crewai/task.py @@ -1,3 +1,4 @@ +import datetime import json import os import threading @@ -107,6 +108,7 @@ class Task(BaseModel): _original_description: str | None = None _original_expected_output: str | None = None _thread: threading.Thread | None = None + _execution_time: float | None = None def __init__(__pydantic_self__, **data): config = data.pop("config", {}) @@ -120,6 +122,12 @@ class Task(BaseModel): "may_not_set_field", "This field is not to be set by the user.", {} ) + def _set_start_execution_time(self) -> float: + return datetime.datetime.now().timestamp() + + def _set_end_execution_time(self, start_time: float) -> None: + self._execution_time = datetime.datetime.now().timestamp() - start_time + @field_validator("output_file") @classmethod def output_file_validation(cls, value: str) -> str: @@ -216,6 +224,7 @@ class Task(BaseModel): f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical." ) + start_time = self._set_start_execution_time() self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self) self.prompt_context = context @@ -239,6 +248,7 @@ class Task(BaseModel): ) self.output = task_output + self._set_end_execution_time(start_time) if self.callback: self.callback(self.output) @@ -250,7 +260,9 @@ class Task(BaseModel): content = ( json_output if json_output - else pydantic_output.model_dump_json() if pydantic_output else result + else pydantic_output.model_dump_json() + if pydantic_output + else result ) self._save_file(content) diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py index 3f1abb8b8..c740f1f3b 100644 --- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -1,4 +1,5 @@ from collections import defaultdict +from datetime import datetime from langchain_openai import ChatOpenAI from pydantic import BaseModel, Field @@ -28,7 +29,9 @@ class CrewEvaluator: """ tasks_scores: defaultdict = defaultdict(list) + run_execution_times: defaultdict = defaultdict(list) iteration: int = 0 + execution_time_elapsed_list = [] def __init__(self, crew, openai_model_name: str): self.crew = crew @@ -40,9 +43,6 @@ class CrewEvaluator: for task in self.crew.tasks: task.callback = self.evaluate - def set_iteration(self, iteration: int) -> None: - self.iteration = iteration - def _evaluator_agent(self): return Agent( role="Task Execution Evaluator", @@ -71,6 +71,21 @@ class CrewEvaluator: output_pydantic=TaskEvaluationPydanticOutput, ) + def set_iteration(self, iteration: int) -> None: + self.iteration = iteration + + def set_start_time(self) -> None: + """Sets the start time for the evaluation process.""" + self._start_time = datetime.now().timestamp() + + def set_end_time(self) -> None: + """Sets the end time for the evaluation process.""" + self._end_time = datetime.now().timestamp() + + def compute_execution_time(self) -> None: + """Calculates the execution time for the evaluation process.""" + self.execution_time_elapsed_list.append(self._end_time - self._start_time) + def print_crew_evaluation_result(self) -> None: """ Prints the evaluation result of the crew in a table. @@ -91,6 +106,9 @@ class CrewEvaluator: sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values()) ] crew_average = sum(task_averages) / len(task_averages) + # execution_time_avg = sum(self.execution_time_elapsed_list) / len( + # self.execution_time_elapsed_list + # ) # Create a table table = Table(title="Tasks Scores \n (1-10 Higher is better)") @@ -119,6 +137,16 @@ class CrewEvaluator: ] table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}") + run_exec_times = [ + int(sum(tasks_exec_times)) + for _, tasks_exec_times in self.run_execution_times.items() + ] + execution_time_avg = int(sum(run_exec_times) / len(run_exec_times)) + table.add_row( + "Execution Time (s)", + *map(str, run_exec_times), + f"{execution_time_avg}", + ) # Display the table in the terminal console = Console() console.print(table) @@ -145,5 +173,8 @@ class CrewEvaluator: if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput): self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality) + self.run_execution_times[self.iteration].append( + current_task._execution_time + ) else: raise ValueError("Evaluation result is not in the expected format")