Files
crewAI/tests/utilities/evaluators/test_task_evaluator.py
Eduardo Chiarotti 175d5b3dd6 feat: Add Train feature for Crews (#686)
* feat: add training logic to agent and crew

* feat: add training logic to agent executor

* feat: add input parameter  to cli command

* feat: add utilities for the training logic

* feat: polish code, logic and add private variables

* feat: add docstring and type hinting to executor

* feat: add constant file, add constant to code

* feat: fix name of training handler function

* feat: remove unused var

* feat: change file handler file name

* feat: Add training handler file, class and change on the code

* feat: fix name error from file

* fix: change import to adapt to logic

* feat: add training handler test

* feat: add tests for file and training_handler

* feat: add test for task evaluator function

* feat: change text to fit in-screen

* feat: add test for train function

* feat: add test for agent training_handler function

* feat: add test for agent._use_trained_data
2024-06-27 02:22:34 -03:00

65 lines
3.1 KiB
Python

from unittest import mock
from unittest.mock import MagicMock, patch
from crewai.utilities.evaluators.task_evaluator import (
TaskEvaluator,
TrainingTaskEvaluation,
)
@patch("crewai.utilities.evaluators.task_evaluator.Converter")
def test_evaluate_training_data(converter_mock):
training_data = {
"agent_id": {
"data1": {
"initial_output": "Initial output 1",
"human_feedback": "Human feedback 1",
"improved_output": "Improved output 1",
},
"data2": {
"initial_output": "Initial output 2",
"human_feedback": "Human feedback 2",
"improved_output": "Improved output 2",
},
}
}
agent_id = "agent_id"
original_agent = MagicMock()
function_return_value = TrainingTaskEvaluation(
suggestions=[
"The initial output was already good, having a detailed explanation. However, the improved output "
"gave similar information but in a more professional manner using better vocabulary. For future tasks, "
"try to implement more elaborate language and precise terminology from the beginning."
],
quality=8.0,
final_summary="The agent responded well initially. However, the improved output showed that there is room "
"for enhancement in terms of language usage, precision, and professionalism. For future tasks, the agent "
"should focus more on these points from the start to increase performance.",
)
converter_mock.return_value.to_pydantic.return_value = function_return_value
result = TaskEvaluator(original_agent=original_agent).evaluate_training_data(
training_data, agent_id
)
assert result == function_return_value
converter_mock.assert_has_calls(
[
mock.call(
llm=original_agent.llm,
text="Assess the quality of the training data based on the llm output, human feedback , and llm "
"output improved result.\n\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
"1\n\nImproved Output:\nImproved output 1\n\nInitial Output:\nInitial output 2\n\nHuman "
"Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\nPlease provide:\n- "
"Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs "
"provide action items based on human_feedback for future tasks\n- A score from 0 to 10 evaluating "
"on completion, quality, and overall performance from the improved output to the initial output "
"based on the human feedback\n",
model=TrainingTaskEvaluation,
instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
"following structure, with the following keys:\n- suggestions: List[str]\n- "
"quality: float\n- final_summary: str",
),
mock.call().to_pydantic(),
]
)