mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
feat: Add execution time to both task and testing feature (#1031)
* feat: Add execution time to both task and testing feature * feat: Remove unused functions * feat: change test_crew to evalaute_crew to avoid issues with testing libs * feat: fix tests
This commit is contained in:
committed by
GitHub
parent
de6b597eff
commit
d824db82a3
@@ -6,9 +6,9 @@ from crewai.memory.storage.kickoff_task_outputs_storage import (
|
||||
)
|
||||
|
||||
from .create_crew import create_crew
|
||||
from .evaluate_crew import evaluate_crew
|
||||
from .replay_from_task import replay_task_command
|
||||
from .reset_memories_command import reset_memories_command
|
||||
from .test_crew import test_crew
|
||||
from .train_crew import train_crew
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ def reset_memories(long, short, entities, kickoff_outputs, all):
|
||||
def test(n_iterations: int, model: str):
|
||||
"""Test the crew and evaluate the results."""
|
||||
click.echo(f"Testing the crew for {n_iterations} iterations with model {model}")
|
||||
test_crew(n_iterations, model)
|
||||
evaluate_crew(n_iterations, model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import subprocess
|
||||
|
||||
import click
|
||||
import pytest
|
||||
|
||||
pytest.skip(allow_module_level=True)
|
||||
|
||||
|
||||
def test_crew(n_iterations: int, model: str) -> None:
|
||||
def evaluate_crew(n_iterations: int, model: str) -> None:
|
||||
"""
|
||||
Test the crew by running a command in the Poetry environment.
|
||||
Test and Evaluate the crew by running a command in the Poetry environment.
|
||||
|
||||
Args:
|
||||
n_iterations (int): The number of iterations to test the crew.
|
||||
@@ -1,3 +1,4 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
@@ -107,6 +108,7 @@ class Task(BaseModel):
|
||||
_original_description: str | None = None
|
||||
_original_expected_output: str | None = None
|
||||
_thread: threading.Thread | None = None
|
||||
_execution_time: float | None = None
|
||||
|
||||
def __init__(__pydantic_self__, **data):
|
||||
config = data.pop("config", {})
|
||||
@@ -120,6 +122,12 @@ class Task(BaseModel):
|
||||
"may_not_set_field", "This field is not to be set by the user.", {}
|
||||
)
|
||||
|
||||
def _set_start_execution_time(self) -> float:
|
||||
return datetime.datetime.now().timestamp()
|
||||
|
||||
def _set_end_execution_time(self, start_time: float) -> None:
|
||||
self._execution_time = datetime.datetime.now().timestamp() - start_time
|
||||
|
||||
@field_validator("output_file")
|
||||
@classmethod
|
||||
def output_file_validation(cls, value: str) -> str:
|
||||
@@ -216,6 +224,7 @@ class Task(BaseModel):
|
||||
f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
|
||||
)
|
||||
|
||||
start_time = self._set_start_execution_time()
|
||||
self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)
|
||||
|
||||
self.prompt_context = context
|
||||
@@ -239,6 +248,7 @@ class Task(BaseModel):
|
||||
)
|
||||
self.output = task_output
|
||||
|
||||
self._set_end_execution_time(start_time)
|
||||
if self.callback:
|
||||
self.callback(self.output)
|
||||
|
||||
@@ -250,7 +260,9 @@ class Task(BaseModel):
|
||||
content = (
|
||||
json_output
|
||||
if json_output
|
||||
else pydantic_output.model_dump_json() if pydantic_output else result
|
||||
else pydantic_output.model_dump_json()
|
||||
if pydantic_output
|
||||
else result
|
||||
)
|
||||
self._save_file(content)
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ class CrewEvaluator:
|
||||
"""
|
||||
|
||||
tasks_scores: defaultdict = defaultdict(list)
|
||||
run_execution_times: defaultdict = defaultdict(list)
|
||||
iteration: int = 0
|
||||
|
||||
def __init__(self, crew, openai_model_name: str):
|
||||
@@ -40,9 +41,6 @@ class CrewEvaluator:
|
||||
for task in self.crew.tasks:
|
||||
task.callback = self.evaluate
|
||||
|
||||
def set_iteration(self, iteration: int) -> None:
|
||||
self.iteration = iteration
|
||||
|
||||
def _evaluator_agent(self):
|
||||
return Agent(
|
||||
role="Task Execution Evaluator",
|
||||
@@ -71,6 +69,9 @@ class CrewEvaluator:
|
||||
output_pydantic=TaskEvaluationPydanticOutput,
|
||||
)
|
||||
|
||||
def set_iteration(self, iteration: int) -> None:
|
||||
self.iteration = iteration
|
||||
|
||||
def print_crew_evaluation_result(self) -> None:
|
||||
"""
|
||||
Prints the evaluation result of the crew in a table.
|
||||
@@ -119,6 +120,16 @@ class CrewEvaluator:
|
||||
]
|
||||
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
|
||||
|
||||
run_exec_times = [
|
||||
int(sum(tasks_exec_times))
|
||||
for _, tasks_exec_times in self.run_execution_times.items()
|
||||
]
|
||||
execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
|
||||
table.add_row(
|
||||
"Execution Time (s)",
|
||||
*map(str, run_exec_times),
|
||||
f"{execution_time_avg}",
|
||||
)
|
||||
# Display the table in the terminal
|
||||
console = Console()
|
||||
console.print(table)
|
||||
@@ -145,5 +156,8 @@ class CrewEvaluator:
|
||||
|
||||
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
||||
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
||||
self.run_execution_times[self.iteration].append(
|
||||
current_task._execution_time
|
||||
)
|
||||
else:
|
||||
raise ValueError("Evaluation result is not in the expected format")
|
||||
|
||||
Reference in New Issue
Block a user