Files
crewAI/src/crewai/utilities/evaluators/task_evaluator.py
Eduardo Chiarotti 175d5b3dd6 feat: Add Train feature for Crews (#686)
* feat: add training logic to agent and crew

* feat: add training logic to agent executor

* feat: add input parameter  to cli command

* feat: add utilities for the training logic

* feat: polish code, logic and add private variables

* feat: add docstring and type hinting to executor

* feat: add constant file, add constant to code

* feat: fix name of training handler function

* feat: remove unused var

* feat: change file handler file name

* feat: Add training handler file, class and change on the code

* feat: fix name error from file

* fix: change import to adapt to logic

* feat: add training handler test

* feat: add tests for file and training_handler

* feat: add test for task evaluator function

* feat: change text to fit in-screen

* feat: add test for train function

* feat: add test for agent training_handler function

* feat: add test for agent._use_trained_data
2024-06-27 02:22:34 -03:00

120 lines
5.0 KiB
Python

from typing import List
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from crewai.utilities import Converter
from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser
class Entity(BaseModel):
name: str = Field(description="The name of the entity.")
type: str = Field(description="The type of the entity.")
description: str = Field(description="Description of the entity.")
relationships: List[str] = Field(description="Relationships of the entity.")
class TaskEvaluation(BaseModel):
suggestions: List[str] = Field(
description="Suggestions to improve future similar tasks."
)
quality: float = Field(
description="A score from 0 to 10 evaluating on completion, quality, and overall performance, all taking into account the task description, expected output, and the result of the task."
)
entities: List[Entity] = Field(
description="Entities extracted from the task output."
)
class TrainingTaskEvaluation(BaseModel):
suggestions: List[str] = Field(
description="Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs provide action items based on human_feedback for future tasks."
)
quality: float = Field(
description="A score from 0 to 10 evaluating on completion, quality, and overall performance from the improved output to the initial output based on the human feedback."
)
final_summary: str = Field(
description="A step by step action items to improve the next Agent based on the human-feedback and improved output."
)
class TaskEvaluator:
def __init__(self, original_agent):
self.llm = original_agent.llm
def evaluate(self, task, ouput) -> TaskEvaluation:
evaluation_query = (
f"Assess the quality of the task completed based on the description, expected output, and actual results.\n\n"
f"Task Description:\n{task.description}\n\n"
f"Expected Output:\n{task.expected_output}\n\n"
f"Actual Output:\n{ouput}\n\n"
"Please provide:\n"
"- Bullet points suggestions to improve future similar tasks\n"
"- A score from 0 to 10 evaluating on completion, quality, and overall performance"
"- Entities extracted from the task output, if any, their type, description, and relationships"
)
instructions = "I'm gonna convert this raw text into valid JSON."
if not self._is_gpt(self.llm):
model_schema = PydanticSchemaParser(model=TaskEvaluation).get_schema()
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
converter = Converter(
llm=self.llm,
text=evaluation_query,
model=TaskEvaluation,
instructions=instructions,
)
return converter.to_pydantic()
def _is_gpt(self, llm) -> bool:
return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
def evaluate_training_data(
self, training_data: dict, agent_id: str
) -> TrainingTaskEvaluation:
"""
Evaluate the training data based on the llm output, human feedback, and improved output.
Parameters:
- training_data (dict): The training data to be evaluated.
- agent_id (str): The ID of the agent.
"""
output_training_data = training_data[agent_id]
final_aggregated_data = ""
for _, data in output_training_data.items():
final_aggregated_data += (
f"Initial Output:\n{data['initial_output']}\n\n"
f"Human Feedback:\n{data['human_feedback']}\n\n"
f"Improved Output:\n{data['improved_output']}\n\n"
)
evaluation_query = (
"Assess the quality of the training data based on the llm output, human feedback , and llm output improved result.\n\n"
f"{final_aggregated_data}"
"Please provide:\n"
"- Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs provide action items based on human_feedback for future tasks\n"
"- A score from 0 to 10 evaluating on completion, quality, and overall performance from the improved output to the initial output based on the human feedback\n"
)
instructions = "I'm gonna convert this raw text into valid JSON."
if not self._is_gpt(self.llm):
model_schema = PydanticSchemaParser(
model=TrainingTaskEvaluation
).get_schema()
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
converter = Converter(
llm=self.llm,
text=evaluation_query,
model=TrainingTaskEvaluation,
instructions=instructions,
)
pydantic_result = converter.to_pydantic()
return pydantic_result