From 26d9af83670bf5fad4ba555ff65e43db2f889a03 Mon Sep 17 00:00:00 2001 From: "Brandon Hancock (bhancock_ai)" <109994880+bhancockio@users.noreply.github.com> Date: Sat, 7 Sep 2024 02:53:23 -0400 Subject: [PATCH] Brandon/cre 252 add agent to crewai test (#1308) * Update config typecheck to accept agents * Clean up prints * Adding agents to crew evaluator output table * Properly generating table now * Update tests --- src/crewai/crew.py | 5 +- src/crewai/project/annotations.py | 3 +- src/crewai/task.py | 13 ++-- src/crewai/tools/tool_usage.py | 22 +++--- src/crewai/utilities/config.py | 11 ++- .../evaluators/crew_evaluator_handler.py | 78 ++++++++++++------- .../evaluators/test_crew_evaluator_handler.py | 45 ++++++++--- 7 files changed, 115 insertions(+), 62 deletions(-) diff --git a/src/crewai/crew.py b/src/crewai/crew.py index 87f3f29f6..fff2c3a85 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -584,7 +584,10 @@ class Crew(BaseModel): self.manager_agent.allow_delegation = True manager = self.manager_agent if manager.tools is not None and len(manager.tools) > 0: - raise Exception("Manager agent should not have tools") + self._logger.log( + "warning", "Manager agent should not have tools", color="orange" + ) + manager.tools = [] manager.tools = self.manager_agent.get_delegation_tools(self.agents) else: manager = Agent( diff --git a/src/crewai/project/annotations.py b/src/crewai/project/annotations.py index fefbad884..d315edace 100644 --- a/src/crewai/project/annotations.py +++ b/src/crewai/project/annotations.py @@ -103,7 +103,8 @@ def crew(func): for task_name in sorted_task_names: task_instance = tasks[task_name]() instantiated_tasks.append(task_instance) - if hasattr(task_instance, "agent"): + agent_instance = getattr(task_instance, "agent", None) + if agent_instance is not None: agent_instance = task_instance.agent if agent_instance.role not in agent_roles: instantiated_agents.append(agent_instance) diff --git a/src/crewai/task.py b/src/crewai/task.py index 573e83c7d..9762391f9 100644 --- a/src/crewai/task.py +++ b/src/crewai/task.py @@ -6,7 +6,7 @@ import uuid from concurrent.futures import Future from copy import copy from hashlib import md5 -from typing import Any, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union from opentelemetry.trace import Span from pydantic import ( @@ -108,6 +108,7 @@ class Task(BaseModel): description="A converter class used to export structured output", default=None, ) + processed_by_agents: Set[str] = Field(default_factory=set) _telemetry: Telemetry = PrivateAttr(default_factory=Telemetry) _execution_span: Optional[Span] = PrivateAttr(default=None) @@ -241,6 +242,8 @@ class Task(BaseModel): self.prompt_context = context tools = tools or self.tools or [] + self.processed_by_agents.add(agent.role) + result = agent.execute_task( task=self, context=context, @@ -273,9 +276,7 @@ class Task(BaseModel): content = ( json_output if json_output - else pydantic_output.model_dump_json() - if pydantic_output - else result + else pydantic_output.model_dump_json() if pydantic_output else result ) self._save_file(content) @@ -310,8 +311,10 @@ class Task(BaseModel): """Increment the tools errors counter.""" self.tools_errors += 1 - def increment_delegations(self) -> None: + def increment_delegations(self, agent_name: Optional[str]) -> None: """Increment the delegations counter.""" + if agent_name: + self.processed_by_agents.add(agent_name) self.delegations += 1 def copy(self, agents: List["BaseAgent"]) -> "Task": diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py index 7d2c46634..625c94ec3 100644 --- a/src/crewai/tools/tool_usage.py +++ b/src/crewai/tools/tool_usage.py @@ -8,6 +8,7 @@ from langchain_core.tools import BaseTool from langchain_openai import ChatOpenAI from crewai.agents.tools_handler import ToolsHandler +from crewai.task import Task from crewai.telemetry import Telemetry from crewai.tools.tool_calling import InstructorToolCalling, ToolCalling from crewai.utilities import I18N, Converter, ConverterError, Printer @@ -51,7 +52,7 @@ class ToolUsage: original_tools: List[Any], tools_description: str, tools_names: str, - task: Any, + task: Task, function_calling_llm: Any, agent: Any, action: Any, @@ -154,7 +155,10 @@ class ToolUsage: "Delegate work to coworker", "Ask question to coworker", ]: - self.task.increment_delegations() + coworker = ( + calling.arguments.get("coworker") if calling.arguments else None + ) + self.task.increment_delegations(coworker) if calling.arguments: try: @@ -241,7 +245,7 @@ class ToolUsage: result = self._remember_format(result=result) # type: ignore # "_remember_format" of "ToolUsage" does not return a value (it only ever returns None) return result - def _should_remember_format(self) -> None: + def _should_remember_format(self) -> bool: return self.task.used_tools % self._remember_format_after_usages == 0 def _remember_format(self, result: str) -> None: @@ -353,10 +357,10 @@ class ToolUsage: return ToolUsageErrorException( # type: ignore # Incompatible return value type (got "ToolUsageErrorException", expected "ToolCalling | InstructorToolCalling") f'{self._i18n.errors("tool_arguments_error")}' ) - calling = ToolCalling( # type: ignore # Unexpected keyword argument "log" for "ToolCalling" + calling = ToolCalling( tool_name=tool.name, arguments=arguments, - log=tool_string, + log=tool_string, # type: ignore ) except Exception as e: self._run_attempts += 1 @@ -404,19 +408,19 @@ class ToolUsage: '"' + value.replace('"', '\\"') + '"' ) # Re-encapsulate with double quotes elif value.isdigit(): # Check if value is a digit, hence integer - formatted_value = value + value = value elif value.lower() in [ "true", "false", "null", ]: # Check for boolean and null values - formatted_value = value.lower() + value = value.lower() else: # Assume the value is a string and needs quotes - formatted_value = '"' + value.replace('"', '\\"') + '"' + value = '"' + value.replace('"', '\\"') + '"' # Rebuild the entry with proper quoting - formatted_entry = f'"{key}": {formatted_value}' + formatted_entry = f'"{key}": {value}' formatted_entries.append(formatted_entry) # Reconstruct the JSON string diff --git a/src/crewai/utilities/config.py b/src/crewai/utilities/config.py index 56a59ce1b..156a3e66b 100644 --- a/src/crewai/utilities/config.py +++ b/src/crewai/utilities/config.py @@ -23,17 +23,16 @@ def process_config( # Copy values from config (originally from YAML) to the model's attributes. # Only copy if the attribute isn't already set, preserving any explicitly defined values. for key, value in config.items(): - if key not in model_class.model_fields: + if key not in model_class.model_fields or values.get(key) is not None: continue - if values.get(key) is not None: - continue - if isinstance(value, (str, int, float, bool, list)): - values[key] = value - elif isinstance(value, dict): + + if isinstance(value, dict): if isinstance(values.get(key), dict): values[key].update(value) else: values[key] = value + else: + values[key] = value # Remove the config from values to avoid duplicate processing values.pop("config", None) diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py index 7d23ff1df..3136c48e2 100644 --- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -1,14 +1,14 @@ from collections import defaultdict -from langchain_openai import ChatOpenAI -from pydantic import BaseModel, Field -from rich.console import Console -from rich.table import Table - from crewai.agent import Agent from crewai.task import Task from crewai.tasks.task_output import TaskOutput from crewai.telemetry import Telemetry +from langchain_openai import ChatOpenAI +from pydantic import BaseModel, Field +from rich.box import HEAVY_EDGE +from rich.console import Console +from rich.table import Table class TaskEvaluationPydanticOutput(BaseModel): @@ -77,50 +77,72 @@ class CrewEvaluator: def print_crew_evaluation_result(self) -> None: """ Prints the evaluation result of the crew in a table. - A Crew with 2 tasks using the command crewai test -n 2 + A Crew with 2 tasks using the command crewai test -n 3 will output the following table: - Task Scores + Tasks Scores (1-10 Higher is better) - ┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓ - ┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃ - ┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩ - │ Task 1 │ 10.0 │ 9.0 │ 9.5 │ - │ Task 2 │ 9.0 │ 9.0 │ 9.0 │ - │ Crew │ 9.5 │ 9.0 │ 9.2 │ - └────────────┴───────┴───────┴────────────┘ + ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Tasks/Crew/Agents ┃ Run 1 ┃ Run 2 ┃ Run 3 ┃ Avg. Total ┃ Agents ┃ + ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ Task 1 │ 9.0 │ 10.0 │ 9.0 │ 9.3 │ - AI LLMs Senior Researcher │ + │ │ │ │ │ │ - AI LLMs Reporting Analyst │ + │ │ │ │ │ │ │ + │ Task 2 │ 9.0 │ 9.0 │ 9.0 │ 9.0 │ - AI LLMs Senior Researcher │ + │ │ │ │ │ │ - AI LLMs Reporting Analyst │ + │ │ │ │ │ │ │ + │ Crew │ 9.0 │ 9.5 │ 9.0 │ 9.2 │ │ + │ Execution Time (s) │ 42 │ 79 │ 52 │ 57 │ │ + └────────────────────┴───────┴───────┴───────┴────────────┴──────────────────────────────┘ """ task_averages = [ sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values()) ] crew_average = sum(task_averages) / len(task_averages) - # Create a table - table = Table(title="Tasks Scores \n (1-10 Higher is better)") + table = Table(title="Tasks Scores \n (1-10 Higher is better)", box=HEAVY_EDGE) - # Add columns for the table - table.add_column("Tasks/Crew") + table.add_column("Tasks/Crew/Agents", style="cyan") for run in range(1, len(self.tasks_scores) + 1): - table.add_column(f"Run {run}") - table.add_column("Avg. Total") + table.add_column(f"Run {run}", justify="center") + table.add_column("Avg. Total", justify="center") + table.add_column("Agents", style="green") - # Add rows for each task - for task_index in range(len(task_averages)): + for task_index, task in enumerate(self.crew.tasks): task_scores = [ self.tasks_scores[run][task_index] for run in range(1, len(self.tasks_scores) + 1) ] avg_score = task_averages[task_index] + agents = list(task.processed_by_agents) + + # Add the task row with the first agent table.add_row( - f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}" + f"Task {task_index + 1}", + *[f"{score:.1f}" for score in task_scores], + f"{avg_score:.1f}", + f"- {agents[0]}" if agents else "", ) - # Add a row for the crew average + # Add rows for additional agents + for agent in agents[1:]: + table.add_row("", "", "", "", "", f"- {agent}") + + # Add a blank separator row if it's not the last task + if task_index < len(self.crew.tasks) - 1: + table.add_row("", "", "", "", "", "") + + # Add Crew and Execution Time rows crew_scores = [ sum(self.tasks_scores[run]) / len(self.tasks_scores[run]) for run in range(1, len(self.tasks_scores) + 1) ] - table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}") + table.add_row( + "Crew", + *[f"{score:.2f}" for score in crew_scores], + f"{crew_average:.1f}", + "", + ) run_exec_times = [ int(sum(tasks_exec_times)) @@ -128,11 +150,9 @@ class CrewEvaluator: ] execution_time_avg = int(sum(run_exec_times) / len(run_exec_times)) table.add_row( - "Execution Time (s)", - *map(str, run_exec_times), - f"{execution_time_avg}", + "Execution Time (s)", *map(str, run_exec_times), f"{execution_time_avg}", "" ) - # Display the table in the terminal + console = Console() console.print(table) diff --git a/tests/utilities/evaluators/test_crew_evaluator_handler.py b/tests/utilities/evaluators/test_crew_evaluator_handler.py index 30fb7bf76..b45644030 100644 --- a/tests/utilities/evaluators/test_crew_evaluator_handler.py +++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py @@ -1,7 +1,6 @@ from unittest import mock import pytest - from crewai.agent import Agent from crewai.crew import Crew from crewai.task import Task @@ -80,6 +79,7 @@ class TestCrewEvaluator: @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console") @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table") def test_print_crew_evaluation_result(self, table, console, crew_planner): + # Set up task scores and execution times crew_planner.tasks_scores = { 1: [10, 9, 8], 2: [9, 8, 7], @@ -89,22 +89,45 @@ class TestCrewEvaluator: 2: [55, 33, 67], } + # Mock agents and assign them to tasks + crew_planner.crew.agents = [ + mock.Mock(role="Agent 1"), + mock.Mock(role="Agent 2"), + ] + crew_planner.crew.tasks = [ + mock.Mock( + agent=crew_planner.crew.agents[0], processed_by_agents=["Agent 1"] + ), + mock.Mock( + agent=crew_planner.crew.agents[1], processed_by_agents=["Agent 2"] + ), + ] + + # Run the method crew_planner.print_crew_evaluation_result() + # Verify that the table is created with the appropriate structure and rows table.assert_has_calls( [ - mock.call(title="Tasks Scores \n (1-10 Higher is better)"), - mock.call().add_column("Tasks/Crew"), - mock.call().add_column("Run 1"), - mock.call().add_column("Run 2"), - mock.call().add_column("Avg. Total"), - mock.call().add_row("Task 1", "10", "9", "9.5"), - mock.call().add_row("Task 2", "9", "8", "8.5"), - mock.call().add_row("Task 3", "8", "7", "7.5"), - mock.call().add_row("Crew", "9.0", "8.0", "8.5"), - mock.call().add_row("Execution Time (s)", "135", "155", "145"), + mock.call( + title="Tasks Scores \n (1-10 Higher is better)", box=mock.ANY + ), # Title and styling + mock.call().add_column("Tasks/Crew/Agents", style="cyan"), # Columns + mock.call().add_column("Run 1", justify="center"), + mock.call().add_column("Run 2", justify="center"), + mock.call().add_column("Avg. Total", justify="center"), + mock.call().add_column("Agents", style="green"), + # Verify rows for tasks with agents + mock.call().add_row("Task 1", "10.0", "9.0", "9.5", "- Agent 1"), + mock.call().add_row("", "", "", "", "", ""), # Blank row between tasks + mock.call().add_row("Task 2", "9.0", "8.0", "8.5", "- Agent 2"), + # Add crew averages and execution times + mock.call().add_row("Crew", "9.00", "8.00", "8.5", ""), + mock.call().add_row("Execution Time (s)", "135", "155", "145", ""), ] ) + + # Ensure the console prints the table console.assert_has_calls([mock.call(), mock.call().print(table())]) def test_evaluate(self, crew_planner):