feat: add crew Testing/Evaluating feature (#998)

* feat: add crew Testing/evalauting feature

* feat: add docs and add unit test

* feat: improve testing output table

* feat: add tests

* feat: fix type checking issue

* feat: add raise ValueError when testing if output is not the expected

* docs: add docs for Testing

* feat: improve tests and fix some issue

* feat: back to sync

* feat: change opdeai model

* feat: fix test
This commit is contained in:
Eduardo Chiarotti
2024-07-26 14:23:51 -03:00
committed by GitHub
parent 2d086ab596
commit 2d2154ed65
7 changed files with 350 additions and 4 deletions

View File

@@ -8,6 +8,7 @@ from unittest.mock import MagicMock, patch
import pydantic_core
import pytest
from crewai.agent import Agent
from crewai.agents.cache import CacheHandler
from crewai.crew import Crew
@@ -2499,3 +2500,34 @@ def test_conditional_should_execute():
assert condition_mock.call_count == 1
assert condition_mock() is True
assert mock_execute_sync.call_count == 2
@mock.patch("crewai.crew.CrewEvaluator")
@mock.patch("crewai.crew.Crew.kickoff")
def test_crew_testing_function(mock_kickoff, crew_evaluator):
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
expected_output="5 bullet points with a paragraph for each idea.",
agent=researcher,
)
crew = Crew(
agents=[researcher],
tasks=[task],
)
n_iterations = 2
crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
assert len(mock_kickoff.mock_calls) == n_iterations
mock_kickoff.assert_has_calls(
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
)
crew_evaluator.assert_has_calls(
[
mock.call(crew, "gpt-4o-mini"),
mock.call().set_iteration(1),
mock.call().set_iteration(2),
mock.call().print_crew_evaluation_result(),
]
)