Merge branch 'main' into undo-agentops-api-check

2026-05-01 23:32:39 +00:00 · 2024-10-16 11:18:19 -04:00
parent 161e2e20a5 a6b7295092
commit 48e1505a0a
358 changed files with 90406 additions and 1624332 deletions
--- a/src/crewai/utilities/evaluators/task_evaluator.py
+++ b/src/crewai/utilities/evaluators/task_evaluator.py
@@ -1,6 +1,5 @@
 from typing import List

-from langchain_openai import ChatOpenAI
 from pydantic import BaseModel, Field

 from crewai.utilities import Converter
@@ -39,7 +38,7 @@ class TaskEvaluation(BaseModel):

 class TrainingTaskEvaluation(BaseModel):
    suggestions: List[str] = Field(
-        description="Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs provide action items based on human_feedback for future tasks."
+        description="List of clear, actionable instructions derived from the Human Feedbacks to enhance the Agent's performance. Analyze the differences between Initial Outputs and Improved Outputs to generate specific action items for future tasks. Ensure all key and specific points from the human feedback are incorporated into these instructions."
    )
    quality: float = Field(
        description="A score from 0 to 10 evaluating on completion, quality, and overall performance from the improved output to the initial output based on the human feedback."
@@ -68,7 +67,7 @@ class TaskEvaluator:

        instructions = "Convert all responses into valid JSON output."

-        if not self._is_gpt(self.llm):
+        if not self.llm.supports_function_calling():
            model_schema = PydanticSchemaParser(model=TaskEvaluation).get_schema()
            instructions = f"{instructions}\n\nReturn only valid JSON with the following schema:\n```json\n{model_schema}\n```"

@@ -81,9 +80,6 @@ class TaskEvaluator:

        return converter.to_pydantic()

-    def _is_gpt(self, llm) -> bool:
-        return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
-
    def evaluate_training_data(
        self, training_data: dict, agent_id: str
    ) -> TrainingTaskEvaluation:
@@ -109,12 +105,12 @@ class TaskEvaluator:
            "Assess the quality of the training data based on the llm output, human feedback , and llm output improved result.\n\n"
            f"{final_aggregated_data}"
            "Please provide:\n"
-            "- Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs provide action items based on human_feedback for future tasks\n"
+            "- Provide a list of clear, actionable instructions derived from the Human Feedbacks to enhance the Agent's performance. Analyze the differences between Initial Outputs and Improved Outputs to generate specific action items for future tasks. Ensure all key and specificpoints from the human feedback are incorporated into these instructions.\n"
            "- A score from 0 to 10 evaluating on completion, quality, and overall performance from the improved output to the initial output based on the human feedback\n"
        )
        instructions = "I'm gonna convert this raw text into valid JSON."

-        if not self._is_gpt(self.llm):
+        if not self.llm.supports_function_calling():
            model_schema = PydanticSchemaParser(
                model=TrainingTaskEvaluation
            ).get_schema()