Brandon/cre 252 add agent to crewai test (#1308)

* Update config typecheck to accept agents * Clean up prints * Adding agents to crew evaluator output table * Properly generating table now * Update tests
2026-01-10 08:38:30 +00:00 · 2024-09-07 02:53:23 -04:00
parent cdaf2d41c7
commit 26d9af8367
7 changed files with 115 additions and 62 deletions
--- a/src/crewai/utilities/config.py
+++ b/src/crewai/utilities/config.py
@@ -23,17 +23,16 @@ def process_config(
    # Copy values from config (originally from YAML) to the model's attributes.
    # Only copy if the attribute isn't already set, preserving any explicitly defined values.
    for key, value in config.items():
-        if key not in model_class.model_fields:
+        if key not in model_class.model_fields or values.get(key) is not None:
            continue
-        if values.get(key) is not None:
-            continue
-        if isinstance(value, (str, int, float, bool, list)):
-            values[key] = value
-        elif isinstance(value, dict):
+
+        if isinstance(value, dict):
            if isinstance(values.get(key), dict):
                values[key].update(value)
            else:
                values[key] = value
+        else:
+            values[key] = value

    # Remove the config from values to avoid duplicate processing
    values.pop("config", None)
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -1,14 +1,14 @@
 from collections import defaultdict

-from langchain_openai import ChatOpenAI
-from pydantic import BaseModel, Field
-from rich.console import Console
-from rich.table import Table
-
 from crewai.agent import Agent
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+from rich.box import HEAVY_EDGE
+from rich.console import Console
+from rich.table import Table


 class TaskEvaluationPydanticOutput(BaseModel):
@@ -77,50 +77,72 @@ class CrewEvaluator:
    def print_crew_evaluation_result(self) -> None:
        """
        Prints the evaluation result of the crew in a table.
-        A Crew with 2 tasks using the command crewai test -n 2
+        A Crew with 2 tasks using the command crewai test -n 3
        will output the following table:

-                        Task Scores
+                        Tasks Scores
                    (1-10 Higher is better)
-            ┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
-            ┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
-            ┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
-            │ Task 1     │ 10.0  │ 9.0   │ 9.5        │
-            │ Task 2     │ 9.0   │ 9.0   │ 9.0        │
-            │ Crew       │ 9.5   │ 9.0   │ 9.2        │
-            └────────────┴───────┴───────┴────────────┘
+        ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+        ┃ Tasks/Crew/Agents  ┃ Run 1 ┃ Run 2 ┃ Run 3 ┃ Avg. Total ┃ Agents                       ┃
+        ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+        │ Task 1             │ 9.0   │ 10.0  │ 9.0   │ 9.3        │ - AI LLMs Senior Researcher  │
+        │                    │       │       │       │            │ - AI LLMs Reporting Analyst  │
+        │                    │       │       │       │            │                              │
+        │ Task 2             │ 9.0   │ 9.0   │ 9.0   │ 9.0        │ - AI LLMs Senior Researcher  │
+        │                    │       │       │       │            │ - AI LLMs Reporting Analyst  │
+        │                    │       │       │       │            │                              │
+        │ Crew               │ 9.0   │ 9.5   │ 9.0   │ 9.2        │                              │
+        │ Execution Time (s) │ 42    │ 79    │ 52    │ 57         │                              │
+        └────────────────────┴───────┴───────┴───────┴────────────┴──────────────────────────────┘
        """
        task_averages = [
            sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
        ]
        crew_average = sum(task_averages) / len(task_averages)

-        # Create a table
-        table = Table(title="Tasks Scores \n (1-10 Higher is better)")
+        table = Table(title="Tasks Scores \n (1-10 Higher is better)", box=HEAVY_EDGE)

-        # Add columns for the table
-        table.add_column("Tasks/Crew")
+        table.add_column("Tasks/Crew/Agents", style="cyan")
        for run in range(1, len(self.tasks_scores) + 1):
-            table.add_column(f"Run {run}")
-        table.add_column("Avg. Total")
+            table.add_column(f"Run {run}", justify="center")
+        table.add_column("Avg. Total", justify="center")
+        table.add_column("Agents", style="green")

-        # Add rows for each task
-        for task_index in range(len(task_averages)):
+        for task_index, task in enumerate(self.crew.tasks):
            task_scores = [
                self.tasks_scores[run][task_index]
                for run in range(1, len(self.tasks_scores) + 1)
            ]
            avg_score = task_averages[task_index]
+            agents = list(task.processed_by_agents)
+
+            # Add the task row with the first agent
            table.add_row(
-                f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}"
+                f"Task {task_index + 1}",
+                *[f"{score:.1f}" for score in task_scores],
+                f"{avg_score:.1f}",
+                f"- {agents[0]}" if agents else "",
            )

-        # Add a row for the crew average
+            # Add rows for additional agents
+            for agent in agents[1:]:
+                table.add_row("", "", "", "", "", f"- {agent}")
+
+            # Add a blank separator row if it's not the last task
+            if task_index < len(self.crew.tasks) - 1:
+                table.add_row("", "", "", "", "", "")
+
+        # Add Crew and Execution Time rows
        crew_scores = [
            sum(self.tasks_scores[run]) / len(self.tasks_scores[run])
            for run in range(1, len(self.tasks_scores) + 1)
        ]
-        table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
+        table.add_row(
+            "Crew",
+            *[f"{score:.2f}" for score in crew_scores],
+            f"{crew_average:.1f}",
+            "",
+        )

        run_exec_times = [
            int(sum(tasks_exec_times))
@@ -128,11 +150,9 @@ class CrewEvaluator:
        ]
        execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
        table.add_row(
-            "Execution Time (s)",
-            *map(str, run_exec_times),
-            f"{execution_time_avg}",
+            "Execution Time (s)", *map(str, run_exec_times), f"{execution_time_avg}", ""
        )
-        # Display the table in the terminal
+
        console = Console()
        console.print(table)