From 26d9af83670bf5fad4ba555ff65e43db2f889a03 Mon Sep 17 00:00:00 2001
From: "Brandon Hancock (bhancock_ai)"
 <109994880+bhancockio@users.noreply.github.com>
Date: Sat, 7 Sep 2024 02:53:23 -0400
Subject: [PATCH] Brandon/cre 252 add agent to crewai test (#1308)

* Update config typecheck to accept agents

* Clean up prints

* Adding agents to crew evaluator output table

* Properly generating table now

* Update tests
---
 src/crewai/crew.py                            |  5 +-
 src/crewai/project/annotations.py             |  3 +-
 src/crewai/task.py                            | 13 ++--
 src/crewai/tools/tool_usage.py                | 22 +++---
 src/crewai/utilities/config.py                | 11 ++-
 .../evaluators/crew_evaluator_handler.py      | 78 ++++++++++++-------
 .../evaluators/test_crew_evaluator_handler.py | 45 ++++++++---
 7 files changed, 115 insertions(+), 62 deletions(-)

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
index 87f3f29f6..fff2c3a85 100644
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -584,7 +584,10 @@ class Crew(BaseModel):
             self.manager_agent.allow_delegation = True
             manager = self.manager_agent
             if manager.tools is not None and len(manager.tools) > 0:
-                raise Exception("Manager agent should not have tools")
+                self._logger.log(
+                    "warning", "Manager agent should not have tools", color="orange"
+                )
+                manager.tools = []
             manager.tools = self.manager_agent.get_delegation_tools(self.agents)
         else:
             manager = Agent(
diff --git a/src/crewai/project/annotations.py b/src/crewai/project/annotations.py
index fefbad884..d315edace 100644
--- a/src/crewai/project/annotations.py
+++ b/src/crewai/project/annotations.py
@@ -103,7 +103,8 @@ def crew(func):
         for task_name in sorted_task_names:
             task_instance = tasks[task_name]()
             instantiated_tasks.append(task_instance)
-            if hasattr(task_instance, "agent"):
+            agent_instance = getattr(task_instance, "agent", None)
+            if agent_instance is not None:
                 agent_instance = task_instance.agent
                 if agent_instance.role not in agent_roles:
                     instantiated_agents.append(agent_instance)
diff --git a/src/crewai/task.py b/src/crewai/task.py
index 573e83c7d..9762391f9 100644
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -6,7 +6,7 @@ import uuid
 from concurrent.futures import Future
 from copy import copy
 from hashlib import md5
-from typing import Any, Dict, List, Optional, Tuple, Type, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union
 
 from opentelemetry.trace import Span
 from pydantic import (
@@ -108,6 +108,7 @@ class Task(BaseModel):
         description="A converter class used to export structured output",
         default=None,
     )
+    processed_by_agents: Set[str] = Field(default_factory=set)
 
     _telemetry: Telemetry = PrivateAttr(default_factory=Telemetry)
     _execution_span: Optional[Span] = PrivateAttr(default=None)
@@ -241,6 +242,8 @@ class Task(BaseModel):
         self.prompt_context = context
         tools = tools or self.tools or []
 
+        self.processed_by_agents.add(agent.role)
+
         result = agent.execute_task(
             task=self,
             context=context,
@@ -273,9 +276,7 @@ class Task(BaseModel):
             content = (
                 json_output
                 if json_output
-                else pydantic_output.model_dump_json()
-                if pydantic_output
-                else result
+                else pydantic_output.model_dump_json() if pydantic_output else result
             )
             self._save_file(content)
 
@@ -310,8 +311,10 @@ class Task(BaseModel):
         """Increment the tools errors counter."""
         self.tools_errors += 1
 
-    def increment_delegations(self) -> None:
+    def increment_delegations(self, agent_name: Optional[str]) -> None:
         """Increment the delegations counter."""
+        if agent_name:
+            self.processed_by_agents.add(agent_name)
         self.delegations += 1
 
     def copy(self, agents: List["BaseAgent"]) -> "Task":
diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py
index 7d2c46634..625c94ec3 100644
--- a/src/crewai/tools/tool_usage.py
+++ b/src/crewai/tools/tool_usage.py
@@ -8,6 +8,7 @@ from langchain_core.tools import BaseTool
 from langchain_openai import ChatOpenAI
 
 from crewai.agents.tools_handler import ToolsHandler
+from crewai.task import Task
 from crewai.telemetry import Telemetry
 from crewai.tools.tool_calling import InstructorToolCalling, ToolCalling
 from crewai.utilities import I18N, Converter, ConverterError, Printer
@@ -51,7 +52,7 @@ class ToolUsage:
         original_tools: List[Any],
         tools_description: str,
         tools_names: str,
-        task: Any,
+        task: Task,
         function_calling_llm: Any,
         agent: Any,
         action: Any,
@@ -154,7 +155,10 @@ class ToolUsage:
                     "Delegate work to coworker",
                     "Ask question to coworker",
                 ]:
-                    self.task.increment_delegations()
+                    coworker = (
+                        calling.arguments.get("coworker") if calling.arguments else None
+                    )
+                    self.task.increment_delegations(coworker)
 
                 if calling.arguments:
                     try:
@@ -241,7 +245,7 @@ class ToolUsage:
             result = self._remember_format(result=result)  # type: ignore # "_remember_format" of "ToolUsage" does not return a value (it only ever returns None)
         return result
 
-    def _should_remember_format(self) -> None:
+    def _should_remember_format(self) -> bool:
         return self.task.used_tools % self._remember_format_after_usages == 0
 
     def _remember_format(self, result: str) -> None:
@@ -353,10 +357,10 @@ class ToolUsage:
                     return ToolUsageErrorException(  # type: ignore # Incompatible return value type (got "ToolUsageErrorException", expected "ToolCalling | InstructorToolCalling")
                         f'{self._i18n.errors("tool_arguments_error")}'
                     )
-                calling = ToolCalling(  # type: ignore # Unexpected keyword argument "log" for "ToolCalling"
+                calling = ToolCalling(
                     tool_name=tool.name,
                     arguments=arguments,
-                    log=tool_string,
+                    log=tool_string,  # type: ignore
                 )
         except Exception as e:
             self._run_attempts += 1
@@ -404,19 +408,19 @@ class ToolUsage:
                         '"' + value.replace('"', '\\"') + '"'
                     )  # Re-encapsulate with double quotes
                 elif value.isdigit():  # Check if value is a digit, hence integer
-                    formatted_value = value
+                    value = value
                 elif value.lower() in [
                     "true",
                     "false",
                     "null",
                 ]:  # Check for boolean and null values
-                    formatted_value = value.lower()
+                    value = value.lower()
                 else:
                     # Assume the value is a string and needs quotes
-                    formatted_value = '"' + value.replace('"', '\\"') + '"'
+                    value = '"' + value.replace('"', '\\"') + '"'
 
                 # Rebuild the entry with proper quoting
-                formatted_entry = f'"{key}": {formatted_value}'
+                formatted_entry = f'"{key}": {value}'
                 formatted_entries.append(formatted_entry)
 
             # Reconstruct the JSON string
diff --git a/src/crewai/utilities/config.py b/src/crewai/utilities/config.py
index 56a59ce1b..156a3e66b 100644
--- a/src/crewai/utilities/config.py
+++ b/src/crewai/utilities/config.py
@@ -23,17 +23,16 @@ def process_config(
     # Copy values from config (originally from YAML) to the model's attributes.
     # Only copy if the attribute isn't already set, preserving any explicitly defined values.
     for key, value in config.items():
-        if key not in model_class.model_fields:
+        if key not in model_class.model_fields or values.get(key) is not None:
             continue
-        if values.get(key) is not None:
-            continue
-        if isinstance(value, (str, int, float, bool, list)):
-            values[key] = value
-        elif isinstance(value, dict):
+
+        if isinstance(value, dict):
             if isinstance(values.get(key), dict):
                 values[key].update(value)
             else:
                 values[key] = value
+        else:
+            values[key] = value
 
     # Remove the config from values to avoid duplicate processing
     values.pop("config", None)
diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
index 7d23ff1df..3136c48e2 100644
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -1,14 +1,14 @@
 from collections import defaultdict
 
-from langchain_openai import ChatOpenAI
-from pydantic import BaseModel, Field
-from rich.console import Console
-from rich.table import Table
-
 from crewai.agent import Agent
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+from rich.box import HEAVY_EDGE
+from rich.console import Console
+from rich.table import Table
 
 
 class TaskEvaluationPydanticOutput(BaseModel):
@@ -77,50 +77,72 @@ class CrewEvaluator:
     def print_crew_evaluation_result(self) -> None:
         """
         Prints the evaluation result of the crew in a table.
-        A Crew with 2 tasks using the command crewai test -n 2
+        A Crew with 2 tasks using the command crewai test -n 3
         will output the following table:
 
-                        Task Scores
+                        Tasks Scores
                     (1-10 Higher is better)
-            ┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
-            ┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
-            ┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
-            │ Task 1     │ 10.0  │ 9.0   │ 9.5        │
-            │ Task 2     │ 9.0   │ 9.0   │ 9.0        │
-            │ Crew       │ 9.5   │ 9.0   │ 9.2        │
-            └────────────┴───────┴───────┴────────────┘
+        ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+        ┃ Tasks/Crew/Agents  ┃ Run 1 ┃ Run 2 ┃ Run 3 ┃ Avg. Total ┃ Agents                       ┃
+        ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+        │ Task 1             │ 9.0   │ 10.0  │ 9.0   │ 9.3        │ - AI LLMs Senior Researcher  │
+        │                    │       │       │       │            │ - AI LLMs Reporting Analyst  │
+        │                    │       │       │       │            │                              │
+        │ Task 2             │ 9.0   │ 9.0   │ 9.0   │ 9.0        │ - AI LLMs Senior Researcher  │
+        │                    │       │       │       │            │ - AI LLMs Reporting Analyst  │
+        │                    │       │       │       │            │                              │
+        │ Crew               │ 9.0   │ 9.5   │ 9.0   │ 9.2        │                              │
+        │ Execution Time (s) │ 42    │ 79    │ 52    │ 57         │                              │
+        └────────────────────┴───────┴───────┴───────┴────────────┴──────────────────────────────┘
         """
         task_averages = [
             sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
         ]
         crew_average = sum(task_averages) / len(task_averages)
 
-        # Create a table
-        table = Table(title="Tasks Scores \n (1-10 Higher is better)")
+        table = Table(title="Tasks Scores \n (1-10 Higher is better)", box=HEAVY_EDGE)
 
-        # Add columns for the table
-        table.add_column("Tasks/Crew")
+        table.add_column("Tasks/Crew/Agents", style="cyan")
         for run in range(1, len(self.tasks_scores) + 1):
-            table.add_column(f"Run {run}")
-        table.add_column("Avg. Total")
+            table.add_column(f"Run {run}", justify="center")
+        table.add_column("Avg. Total", justify="center")
+        table.add_column("Agents", style="green")
 
-        # Add rows for each task
-        for task_index in range(len(task_averages)):
+        for task_index, task in enumerate(self.crew.tasks):
             task_scores = [
                 self.tasks_scores[run][task_index]
                 for run in range(1, len(self.tasks_scores) + 1)
             ]
             avg_score = task_averages[task_index]
+            agents = list(task.processed_by_agents)
+
+            # Add the task row with the first agent
             table.add_row(
-                f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}"
+                f"Task {task_index + 1}",
+                *[f"{score:.1f}" for score in task_scores],
+                f"{avg_score:.1f}",
+                f"- {agents[0]}" if agents else "",
             )
 
-        # Add a row for the crew average
+            # Add rows for additional agents
+            for agent in agents[1:]:
+                table.add_row("", "", "", "", "", f"- {agent}")
+
+            # Add a blank separator row if it's not the last task
+            if task_index < len(self.crew.tasks) - 1:
+                table.add_row("", "", "", "", "", "")
+
+        # Add Crew and Execution Time rows
         crew_scores = [
             sum(self.tasks_scores[run]) / len(self.tasks_scores[run])
             for run in range(1, len(self.tasks_scores) + 1)
         ]
-        table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
+        table.add_row(
+            "Crew",
+            *[f"{score:.2f}" for score in crew_scores],
+            f"{crew_average:.1f}",
+            "",
+        )
 
         run_exec_times = [
             int(sum(tasks_exec_times))
@@ -128,11 +150,9 @@ class CrewEvaluator:
         ]
         execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
         table.add_row(
-            "Execution Time (s)",
-            *map(str, run_exec_times),
-            f"{execution_time_avg}",
+            "Execution Time (s)", *map(str, run_exec_times), f"{execution_time_avg}", ""
         )
-        # Display the table in the terminal
+
         console = Console()
         console.print(table)
 
diff --git a/tests/utilities/evaluators/test_crew_evaluator_handler.py b/tests/utilities/evaluators/test_crew_evaluator_handler.py
index 30fb7bf76..b45644030 100644
--- a/tests/utilities/evaluators/test_crew_evaluator_handler.py
+++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py
@@ -1,7 +1,6 @@
 from unittest import mock
 
 import pytest
-
 from crewai.agent import Agent
 from crewai.crew import Crew
 from crewai.task import Task
@@ -80,6 +79,7 @@ class TestCrewEvaluator:
     @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console")
     @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table")
     def test_print_crew_evaluation_result(self, table, console, crew_planner):
+        # Set up task scores and execution times
         crew_planner.tasks_scores = {
             1: [10, 9, 8],
             2: [9, 8, 7],
@@ -89,22 +89,45 @@ class TestCrewEvaluator:
             2: [55, 33, 67],
         }
 
+        # Mock agents and assign them to tasks
+        crew_planner.crew.agents = [
+            mock.Mock(role="Agent 1"),
+            mock.Mock(role="Agent 2"),
+        ]
+        crew_planner.crew.tasks = [
+            mock.Mock(
+                agent=crew_planner.crew.agents[0], processed_by_agents=["Agent 1"]
+            ),
+            mock.Mock(
+                agent=crew_planner.crew.agents[1], processed_by_agents=["Agent 2"]
+            ),
+        ]
+
+        # Run the method
         crew_planner.print_crew_evaluation_result()
 
+        # Verify that the table is created with the appropriate structure and rows
         table.assert_has_calls(
             [
-                mock.call(title="Tasks Scores \n (1-10 Higher is better)"),
-                mock.call().add_column("Tasks/Crew"),
-                mock.call().add_column("Run 1"),
-                mock.call().add_column("Run 2"),
-                mock.call().add_column("Avg. Total"),
-                mock.call().add_row("Task 1", "10", "9", "9.5"),
-                mock.call().add_row("Task 2", "9", "8", "8.5"),
-                mock.call().add_row("Task 3", "8", "7", "7.5"),
-                mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
-                mock.call().add_row("Execution Time (s)", "135", "155", "145"),
+                mock.call(
+                    title="Tasks Scores \n (1-10 Higher is better)", box=mock.ANY
+                ),  # Title and styling
+                mock.call().add_column("Tasks/Crew/Agents", style="cyan"),  # Columns
+                mock.call().add_column("Run 1", justify="center"),
+                mock.call().add_column("Run 2", justify="center"),
+                mock.call().add_column("Avg. Total", justify="center"),
+                mock.call().add_column("Agents", style="green"),
+                # Verify rows for tasks with agents
+                mock.call().add_row("Task 1", "10.0", "9.0", "9.5", "- Agent 1"),
+                mock.call().add_row("", "", "", "", "", ""),  # Blank row between tasks
+                mock.call().add_row("Task 2", "9.0", "8.0", "8.5", "- Agent 2"),
+                # Add crew averages and execution times
+                mock.call().add_row("Crew", "9.00", "8.00", "8.5", ""),
+                mock.call().add_row("Execution Time (s)", "135", "155", "145", ""),
             ]
         )
+
+        # Ensure the console prints the table
         console.assert_has_calls([mock.call(), mock.call().print(table())])
 
     def test_evaluate(self, crew_planner):