feat: add crew Testing/Evaluating feature (#998)

* feat: add crew Testing/evalauting feature

* feat: add docs and add unit test

* feat: improve testing output table

* feat: add tests

* feat: fix type checking issue

* feat: add raise ValueError when testing if output is not the expected

* docs: add docs for Testing

* feat: improve tests and fix some issue

* feat: back to sync

* feat: change opdeai model

* feat: fix test
This commit is contained in:
Eduardo Chiarotti
2024-07-26 14:23:51 -03:00
committed by GitHub
parent 2d086ab596
commit 2d2154ed65
7 changed files with 350 additions and 4 deletions

View File

@@ -37,6 +37,7 @@ from crewai.utilities.constants import (
TRAINED_AGENTS_DATA_FILE,
TRAINING_DATA_FILE,
)
from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
from crewai.utilities.formatter import (
aggregate_raw_outputs_from_task_outputs,
@@ -967,10 +968,19 @@ class Crew(BaseModel):
return total_usage_metrics
def test(
self, n_iterations: int, model: str, inputs: Optional[Dict[str, Any]] = None
self,
n_iterations: int,
openai_model_name: str,
inputs: Optional[Dict[str, Any]] = None,
) -> None:
"""Test the crew with the given inputs."""
pass
"""Test and evaluate the Crew with the given inputs for n iterations."""
evaluator = CrewEvaluator(self, openai_model_name)
for i in range(1, n_iterations + 1):
evaluator.set_iteration(i)
self.kickoff(inputs=inputs)
evaluator.print_crew_evaluation_result()
def __repr__(self):
return f"Crew(id={self.id}, process={self.process}, number_of_agents={len(self.agents)}, number_of_tasks={len(self.tasks)})"