Supporting eval single Agent/LiteAgent (#3167)

* refactor: rely on task completion event to evaluate agents * feat: remove Crew dependency to evaluate agent * feat: drop execution_context in AgentEvaluator * chore: drop experimental Agent Eval feature from stable crew.test * feat: support eval LiteAgent * resolve linter issues
2026-01-10 08:38:30 +00:00 · 2025-07-15 10:22:41 -03:00
parent 53f674be60
commit 6ebb6c9b63
16 changed files with 1313 additions and 148 deletions
--- a/src/crewai/experimental/evaluation/metrics/goal_metrics.py
+++ b/src/crewai/experimental/evaluation/metrics/goal_metrics.py
@@ -14,10 +14,14 @@ class GoalAlignmentEvaluator(BaseEvaluator):
    def evaluate(
        self,
        agent: Agent,
-        task: Task,
        execution_trace: Dict[str, Any],
        final_output: Any,
+        task: Task | None = None,
    ) -> EvaluationScore:
+        task_context = ""
+        if task is not None:
+            task_context = f"Task description: {task.description}\nExpected output: {task.expected_output}\n"
+
        prompt = [
            {"role": "system", "content": """You are an expert evaluator assessing how well an AI agent's output aligns with its assigned task goal.

@@ -37,8 +41,7 @@ Return your evaluation as JSON with fields 'score' (number) and 'feedback' (stri
            {"role": "user", "content": f"""
 Agent role: {agent.role}
 Agent goal: {agent.goal}
-Task description: {task.description}
-Expected output: {task.expected_output}
+{task_context}

 Agent's final output:
 {final_output}