Merge branch 'main' into undo-agentops-api-check

This commit is contained in:
Brandon Hancock
2024-10-16 11:18:19 -04:00
358 changed files with 90406 additions and 1624332 deletions

View File

@@ -1,6 +1,5 @@
from typing import List
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from crewai.utilities import Converter
@@ -39,7 +38,7 @@ class TaskEvaluation(BaseModel):
class TrainingTaskEvaluation(BaseModel):
suggestions: List[str] = Field(
description="Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs provide action items based on human_feedback for future tasks."
description="List of clear, actionable instructions derived from the Human Feedbacks to enhance the Agent's performance. Analyze the differences between Initial Outputs and Improved Outputs to generate specific action items for future tasks. Ensure all key and specific points from the human feedback are incorporated into these instructions."
)
quality: float = Field(
description="A score from 0 to 10 evaluating on completion, quality, and overall performance from the improved output to the initial output based on the human feedback."
@@ -68,7 +67,7 @@ class TaskEvaluator:
instructions = "Convert all responses into valid JSON output."
if not self._is_gpt(self.llm):
if not self.llm.supports_function_calling():
model_schema = PydanticSchemaParser(model=TaskEvaluation).get_schema()
instructions = f"{instructions}\n\nReturn only valid JSON with the following schema:\n```json\n{model_schema}\n```"
@@ -81,9 +80,6 @@ class TaskEvaluator:
return converter.to_pydantic()
def _is_gpt(self, llm) -> bool:
return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
def evaluate_training_data(
self, training_data: dict, agent_id: str
) -> TrainingTaskEvaluation:
@@ -109,12 +105,12 @@ class TaskEvaluator:
"Assess the quality of the training data based on the llm output, human feedback , and llm output improved result.\n\n"
f"{final_aggregated_data}"
"Please provide:\n"
"- Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs provide action items based on human_feedback for future tasks\n"
"- Provide a list of clear, actionable instructions derived from the Human Feedbacks to enhance the Agent's performance. Analyze the differences between Initial Outputs and Improved Outputs to generate specific action items for future tasks. Ensure all key and specificpoints from the human feedback are incorporated into these instructions.\n"
"- A score from 0 to 10 evaluating on completion, quality, and overall performance from the improved output to the initial output based on the human feedback\n"
)
instructions = "I'm gonna convert this raw text into valid JSON."
if not self._is_gpt(self.llm):
if not self.llm.supports_function_calling():
model_schema = PydanticSchemaParser(
model=TrainingTaskEvaluation
).get_schema()