mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-23 15:18:14 +00:00
feat: Add execution time to both task and testing feature
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import click
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
pytest.skip(allow_module_level=True)
|
import click
|
||||||
|
|
||||||
|
# pytest.skip(allow_module_level=True)
|
||||||
|
|
||||||
|
|
||||||
def test_crew(n_iterations: int, model: str) -> None:
|
def test_crew(n_iterations: int, model: str) -> None:
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
@@ -107,6 +108,7 @@ class Task(BaseModel):
|
|||||||
_original_description: str | None = None
|
_original_description: str | None = None
|
||||||
_original_expected_output: str | None = None
|
_original_expected_output: str | None = None
|
||||||
_thread: threading.Thread | None = None
|
_thread: threading.Thread | None = None
|
||||||
|
_execution_time: float | None = None
|
||||||
|
|
||||||
def __init__(__pydantic_self__, **data):
|
def __init__(__pydantic_self__, **data):
|
||||||
config = data.pop("config", {})
|
config = data.pop("config", {})
|
||||||
@@ -120,6 +122,12 @@ class Task(BaseModel):
|
|||||||
"may_not_set_field", "This field is not to be set by the user.", {}
|
"may_not_set_field", "This field is not to be set by the user.", {}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _set_start_execution_time(self) -> float:
|
||||||
|
return datetime.datetime.now().timestamp()
|
||||||
|
|
||||||
|
def _set_end_execution_time(self, start_time: float) -> None:
|
||||||
|
self._execution_time = datetime.datetime.now().timestamp() - start_time
|
||||||
|
|
||||||
@field_validator("output_file")
|
@field_validator("output_file")
|
||||||
@classmethod
|
@classmethod
|
||||||
def output_file_validation(cls, value: str) -> str:
|
def output_file_validation(cls, value: str) -> str:
|
||||||
@@ -216,6 +224,7 @@ class Task(BaseModel):
|
|||||||
f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
|
f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
start_time = self._set_start_execution_time()
|
||||||
self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)
|
self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)
|
||||||
|
|
||||||
self.prompt_context = context
|
self.prompt_context = context
|
||||||
@@ -239,6 +248,7 @@ class Task(BaseModel):
|
|||||||
)
|
)
|
||||||
self.output = task_output
|
self.output = task_output
|
||||||
|
|
||||||
|
self._set_end_execution_time(start_time)
|
||||||
if self.callback:
|
if self.callback:
|
||||||
self.callback(self.output)
|
self.callback(self.output)
|
||||||
|
|
||||||
@@ -250,7 +260,9 @@ class Task(BaseModel):
|
|||||||
content = (
|
content = (
|
||||||
json_output
|
json_output
|
||||||
if json_output
|
if json_output
|
||||||
else pydantic_output.model_dump_json() if pydantic_output else result
|
else pydantic_output.model_dump_json()
|
||||||
|
if pydantic_output
|
||||||
|
else result
|
||||||
)
|
)
|
||||||
self._save_file(content)
|
self._save_file(content)
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
@@ -28,7 +29,9 @@ class CrewEvaluator:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
tasks_scores: defaultdict = defaultdict(list)
|
tasks_scores: defaultdict = defaultdict(list)
|
||||||
|
run_execution_times: defaultdict = defaultdict(list)
|
||||||
iteration: int = 0
|
iteration: int = 0
|
||||||
|
execution_time_elapsed_list = []
|
||||||
|
|
||||||
def __init__(self, crew, openai_model_name: str):
|
def __init__(self, crew, openai_model_name: str):
|
||||||
self.crew = crew
|
self.crew = crew
|
||||||
@@ -40,9 +43,6 @@ class CrewEvaluator:
|
|||||||
for task in self.crew.tasks:
|
for task in self.crew.tasks:
|
||||||
task.callback = self.evaluate
|
task.callback = self.evaluate
|
||||||
|
|
||||||
def set_iteration(self, iteration: int) -> None:
|
|
||||||
self.iteration = iteration
|
|
||||||
|
|
||||||
def _evaluator_agent(self):
|
def _evaluator_agent(self):
|
||||||
return Agent(
|
return Agent(
|
||||||
role="Task Execution Evaluator",
|
role="Task Execution Evaluator",
|
||||||
@@ -71,6 +71,21 @@ class CrewEvaluator:
|
|||||||
output_pydantic=TaskEvaluationPydanticOutput,
|
output_pydantic=TaskEvaluationPydanticOutput,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def set_iteration(self, iteration: int) -> None:
|
||||||
|
self.iteration = iteration
|
||||||
|
|
||||||
|
def set_start_time(self) -> None:
|
||||||
|
"""Sets the start time for the evaluation process."""
|
||||||
|
self._start_time = datetime.now().timestamp()
|
||||||
|
|
||||||
|
def set_end_time(self) -> None:
|
||||||
|
"""Sets the end time for the evaluation process."""
|
||||||
|
self._end_time = datetime.now().timestamp()
|
||||||
|
|
||||||
|
def compute_execution_time(self) -> None:
|
||||||
|
"""Calculates the execution time for the evaluation process."""
|
||||||
|
self.execution_time_elapsed_list.append(self._end_time - self._start_time)
|
||||||
|
|
||||||
def print_crew_evaluation_result(self) -> None:
|
def print_crew_evaluation_result(self) -> None:
|
||||||
"""
|
"""
|
||||||
Prints the evaluation result of the crew in a table.
|
Prints the evaluation result of the crew in a table.
|
||||||
@@ -91,6 +106,9 @@ class CrewEvaluator:
|
|||||||
sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
|
sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
|
||||||
]
|
]
|
||||||
crew_average = sum(task_averages) / len(task_averages)
|
crew_average = sum(task_averages) / len(task_averages)
|
||||||
|
# execution_time_avg = sum(self.execution_time_elapsed_list) / len(
|
||||||
|
# self.execution_time_elapsed_list
|
||||||
|
# )
|
||||||
|
|
||||||
# Create a table
|
# Create a table
|
||||||
table = Table(title="Tasks Scores \n (1-10 Higher is better)")
|
table = Table(title="Tasks Scores \n (1-10 Higher is better)")
|
||||||
@@ -119,6 +137,16 @@ class CrewEvaluator:
|
|||||||
]
|
]
|
||||||
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
|
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
|
||||||
|
|
||||||
|
run_exec_times = [
|
||||||
|
int(sum(tasks_exec_times))
|
||||||
|
for _, tasks_exec_times in self.run_execution_times.items()
|
||||||
|
]
|
||||||
|
execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
|
||||||
|
table.add_row(
|
||||||
|
"Execution Time (s)",
|
||||||
|
*map(str, run_exec_times),
|
||||||
|
f"{execution_time_avg}",
|
||||||
|
)
|
||||||
# Display the table in the terminal
|
# Display the table in the terminal
|
||||||
console = Console()
|
console = Console()
|
||||||
console.print(table)
|
console.print(table)
|
||||||
@@ -145,5 +173,8 @@ class CrewEvaluator:
|
|||||||
|
|
||||||
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
||||||
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
||||||
|
self.run_execution_times[self.iteration].append(
|
||||||
|
current_task._execution_time
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Evaluation result is not in the expected format")
|
raise ValueError("Evaluation result is not in the expected format")
|
||||||
|
|||||||
Reference in New Issue
Block a user