mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Brandon/cre 252 add agent to crewai test (#1308)
* Update config typecheck to accept agents * Clean up prints * Adding agents to crew evaluator output table * Properly generating table now * Update tests
This commit is contained in:
committed by
GitHub
parent
cdaf2d41c7
commit
26d9af8367
@@ -584,7 +584,10 @@ class Crew(BaseModel):
|
||||
self.manager_agent.allow_delegation = True
|
||||
manager = self.manager_agent
|
||||
if manager.tools is not None and len(manager.tools) > 0:
|
||||
raise Exception("Manager agent should not have tools")
|
||||
self._logger.log(
|
||||
"warning", "Manager agent should not have tools", color="orange"
|
||||
)
|
||||
manager.tools = []
|
||||
manager.tools = self.manager_agent.get_delegation_tools(self.agents)
|
||||
else:
|
||||
manager = Agent(
|
||||
|
||||
@@ -103,7 +103,8 @@ def crew(func):
|
||||
for task_name in sorted_task_names:
|
||||
task_instance = tasks[task_name]()
|
||||
instantiated_tasks.append(task_instance)
|
||||
if hasattr(task_instance, "agent"):
|
||||
agent_instance = getattr(task_instance, "agent", None)
|
||||
if agent_instance is not None:
|
||||
agent_instance = task_instance.agent
|
||||
if agent_instance.role not in agent_roles:
|
||||
instantiated_agents.append(agent_instance)
|
||||
|
||||
@@ -6,7 +6,7 @@ import uuid
|
||||
from concurrent.futures import Future
|
||||
from copy import copy
|
||||
from hashlib import md5
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union
|
||||
|
||||
from opentelemetry.trace import Span
|
||||
from pydantic import (
|
||||
@@ -108,6 +108,7 @@ class Task(BaseModel):
|
||||
description="A converter class used to export structured output",
|
||||
default=None,
|
||||
)
|
||||
processed_by_agents: Set[str] = Field(default_factory=set)
|
||||
|
||||
_telemetry: Telemetry = PrivateAttr(default_factory=Telemetry)
|
||||
_execution_span: Optional[Span] = PrivateAttr(default=None)
|
||||
@@ -241,6 +242,8 @@ class Task(BaseModel):
|
||||
self.prompt_context = context
|
||||
tools = tools or self.tools or []
|
||||
|
||||
self.processed_by_agents.add(agent.role)
|
||||
|
||||
result = agent.execute_task(
|
||||
task=self,
|
||||
context=context,
|
||||
@@ -273,9 +276,7 @@ class Task(BaseModel):
|
||||
content = (
|
||||
json_output
|
||||
if json_output
|
||||
else pydantic_output.model_dump_json()
|
||||
if pydantic_output
|
||||
else result
|
||||
else pydantic_output.model_dump_json() if pydantic_output else result
|
||||
)
|
||||
self._save_file(content)
|
||||
|
||||
@@ -310,8 +311,10 @@ class Task(BaseModel):
|
||||
"""Increment the tools errors counter."""
|
||||
self.tools_errors += 1
|
||||
|
||||
def increment_delegations(self) -> None:
|
||||
def increment_delegations(self, agent_name: Optional[str]) -> None:
|
||||
"""Increment the delegations counter."""
|
||||
if agent_name:
|
||||
self.processed_by_agents.add(agent_name)
|
||||
self.delegations += 1
|
||||
|
||||
def copy(self, agents: List["BaseAgent"]) -> "Task":
|
||||
|
||||
@@ -8,6 +8,7 @@ from langchain_core.tools import BaseTool
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from crewai.agents.tools_handler import ToolsHandler
|
||||
from crewai.task import Task
|
||||
from crewai.telemetry import Telemetry
|
||||
from crewai.tools.tool_calling import InstructorToolCalling, ToolCalling
|
||||
from crewai.utilities import I18N, Converter, ConverterError, Printer
|
||||
@@ -51,7 +52,7 @@ class ToolUsage:
|
||||
original_tools: List[Any],
|
||||
tools_description: str,
|
||||
tools_names: str,
|
||||
task: Any,
|
||||
task: Task,
|
||||
function_calling_llm: Any,
|
||||
agent: Any,
|
||||
action: Any,
|
||||
@@ -154,7 +155,10 @@ class ToolUsage:
|
||||
"Delegate work to coworker",
|
||||
"Ask question to coworker",
|
||||
]:
|
||||
self.task.increment_delegations()
|
||||
coworker = (
|
||||
calling.arguments.get("coworker") if calling.arguments else None
|
||||
)
|
||||
self.task.increment_delegations(coworker)
|
||||
|
||||
if calling.arguments:
|
||||
try:
|
||||
@@ -241,7 +245,7 @@ class ToolUsage:
|
||||
result = self._remember_format(result=result) # type: ignore # "_remember_format" of "ToolUsage" does not return a value (it only ever returns None)
|
||||
return result
|
||||
|
||||
def _should_remember_format(self) -> None:
|
||||
def _should_remember_format(self) -> bool:
|
||||
return self.task.used_tools % self._remember_format_after_usages == 0
|
||||
|
||||
def _remember_format(self, result: str) -> None:
|
||||
@@ -353,10 +357,10 @@ class ToolUsage:
|
||||
return ToolUsageErrorException( # type: ignore # Incompatible return value type (got "ToolUsageErrorException", expected "ToolCalling | InstructorToolCalling")
|
||||
f'{self._i18n.errors("tool_arguments_error")}'
|
||||
)
|
||||
calling = ToolCalling( # type: ignore # Unexpected keyword argument "log" for "ToolCalling"
|
||||
calling = ToolCalling(
|
||||
tool_name=tool.name,
|
||||
arguments=arguments,
|
||||
log=tool_string,
|
||||
log=tool_string, # type: ignore
|
||||
)
|
||||
except Exception as e:
|
||||
self._run_attempts += 1
|
||||
@@ -404,19 +408,19 @@ class ToolUsage:
|
||||
'"' + value.replace('"', '\\"') + '"'
|
||||
) # Re-encapsulate with double quotes
|
||||
elif value.isdigit(): # Check if value is a digit, hence integer
|
||||
formatted_value = value
|
||||
value = value
|
||||
elif value.lower() in [
|
||||
"true",
|
||||
"false",
|
||||
"null",
|
||||
]: # Check for boolean and null values
|
||||
formatted_value = value.lower()
|
||||
value = value.lower()
|
||||
else:
|
||||
# Assume the value is a string and needs quotes
|
||||
formatted_value = '"' + value.replace('"', '\\"') + '"'
|
||||
value = '"' + value.replace('"', '\\"') + '"'
|
||||
|
||||
# Rebuild the entry with proper quoting
|
||||
formatted_entry = f'"{key}": {formatted_value}'
|
||||
formatted_entry = f'"{key}": {value}'
|
||||
formatted_entries.append(formatted_entry)
|
||||
|
||||
# Reconstruct the JSON string
|
||||
|
||||
@@ -23,17 +23,16 @@ def process_config(
|
||||
# Copy values from config (originally from YAML) to the model's attributes.
|
||||
# Only copy if the attribute isn't already set, preserving any explicitly defined values.
|
||||
for key, value in config.items():
|
||||
if key not in model_class.model_fields:
|
||||
if key not in model_class.model_fields or values.get(key) is not None:
|
||||
continue
|
||||
if values.get(key) is not None:
|
||||
continue
|
||||
if isinstance(value, (str, int, float, bool, list)):
|
||||
values[key] = value
|
||||
elif isinstance(value, dict):
|
||||
|
||||
if isinstance(value, dict):
|
||||
if isinstance(values.get(key), dict):
|
||||
values[key].update(value)
|
||||
else:
|
||||
values[key] = value
|
||||
else:
|
||||
values[key] = value
|
||||
|
||||
# Remove the config from values to avoid duplicate processing
|
||||
values.pop("config", None)
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
from collections import defaultdict
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.telemetry import Telemetry
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
from rich.box import HEAVY_EDGE
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
|
||||
class TaskEvaluationPydanticOutput(BaseModel):
|
||||
@@ -77,50 +77,72 @@ class CrewEvaluator:
|
||||
def print_crew_evaluation_result(self) -> None:
|
||||
"""
|
||||
Prints the evaluation result of the crew in a table.
|
||||
A Crew with 2 tasks using the command crewai test -n 2
|
||||
A Crew with 2 tasks using the command crewai test -n 3
|
||||
will output the following table:
|
||||
|
||||
Task Scores
|
||||
Tasks Scores
|
||||
(1-10 Higher is better)
|
||||
┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
|
||||
┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
|
||||
┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
|
||||
│ Task 1 │ 10.0 │ 9.0 │ 9.5 │
|
||||
│ Task 2 │ 9.0 │ 9.0 │ 9.0 │
|
||||
│ Crew │ 9.5 │ 9.0 │ 9.2 │
|
||||
└────────────┴───────┴───────┴────────────┘
|
||||
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ Tasks/Crew/Agents ┃ Run 1 ┃ Run 2 ┃ Run 3 ┃ Avg. Total ┃ Agents ┃
|
||||
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
||||
│ Task 1 │ 9.0 │ 10.0 │ 9.0 │ 9.3 │ - AI LLMs Senior Researcher │
|
||||
│ │ │ │ │ │ - AI LLMs Reporting Analyst │
|
||||
│ │ │ │ │ │ │
|
||||
│ Task 2 │ 9.0 │ 9.0 │ 9.0 │ 9.0 │ - AI LLMs Senior Researcher │
|
||||
│ │ │ │ │ │ - AI LLMs Reporting Analyst │
|
||||
│ │ │ │ │ │ │
|
||||
│ Crew │ 9.0 │ 9.5 │ 9.0 │ 9.2 │ │
|
||||
│ Execution Time (s) │ 42 │ 79 │ 52 │ 57 │ │
|
||||
└────────────────────┴───────┴───────┴───────┴────────────┴──────────────────────────────┘
|
||||
"""
|
||||
task_averages = [
|
||||
sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
|
||||
]
|
||||
crew_average = sum(task_averages) / len(task_averages)
|
||||
|
||||
# Create a table
|
||||
table = Table(title="Tasks Scores \n (1-10 Higher is better)")
|
||||
table = Table(title="Tasks Scores \n (1-10 Higher is better)", box=HEAVY_EDGE)
|
||||
|
||||
# Add columns for the table
|
||||
table.add_column("Tasks/Crew")
|
||||
table.add_column("Tasks/Crew/Agents", style="cyan")
|
||||
for run in range(1, len(self.tasks_scores) + 1):
|
||||
table.add_column(f"Run {run}")
|
||||
table.add_column("Avg. Total")
|
||||
table.add_column(f"Run {run}", justify="center")
|
||||
table.add_column("Avg. Total", justify="center")
|
||||
table.add_column("Agents", style="green")
|
||||
|
||||
# Add rows for each task
|
||||
for task_index in range(len(task_averages)):
|
||||
for task_index, task in enumerate(self.crew.tasks):
|
||||
task_scores = [
|
||||
self.tasks_scores[run][task_index]
|
||||
for run in range(1, len(self.tasks_scores) + 1)
|
||||
]
|
||||
avg_score = task_averages[task_index]
|
||||
agents = list(task.processed_by_agents)
|
||||
|
||||
# Add the task row with the first agent
|
||||
table.add_row(
|
||||
f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}"
|
||||
f"Task {task_index + 1}",
|
||||
*[f"{score:.1f}" for score in task_scores],
|
||||
f"{avg_score:.1f}",
|
||||
f"- {agents[0]}" if agents else "",
|
||||
)
|
||||
|
||||
# Add a row for the crew average
|
||||
# Add rows for additional agents
|
||||
for agent in agents[1:]:
|
||||
table.add_row("", "", "", "", "", f"- {agent}")
|
||||
|
||||
# Add a blank separator row if it's not the last task
|
||||
if task_index < len(self.crew.tasks) - 1:
|
||||
table.add_row("", "", "", "", "", "")
|
||||
|
||||
# Add Crew and Execution Time rows
|
||||
crew_scores = [
|
||||
sum(self.tasks_scores[run]) / len(self.tasks_scores[run])
|
||||
for run in range(1, len(self.tasks_scores) + 1)
|
||||
]
|
||||
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
|
||||
table.add_row(
|
||||
"Crew",
|
||||
*[f"{score:.2f}" for score in crew_scores],
|
||||
f"{crew_average:.1f}",
|
||||
"",
|
||||
)
|
||||
|
||||
run_exec_times = [
|
||||
int(sum(tasks_exec_times))
|
||||
@@ -128,11 +150,9 @@ class CrewEvaluator:
|
||||
]
|
||||
execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
|
||||
table.add_row(
|
||||
"Execution Time (s)",
|
||||
*map(str, run_exec_times),
|
||||
f"{execution_time_avg}",
|
||||
"Execution Time (s)", *map(str, run_exec_times), f"{execution_time_avg}", ""
|
||||
)
|
||||
# Display the table in the terminal
|
||||
|
||||
console = Console()
|
||||
console.print(table)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.crew import Crew
|
||||
from crewai.task import Task
|
||||
@@ -80,6 +79,7 @@ class TestCrewEvaluator:
|
||||
@mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console")
|
||||
@mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table")
|
||||
def test_print_crew_evaluation_result(self, table, console, crew_planner):
|
||||
# Set up task scores and execution times
|
||||
crew_planner.tasks_scores = {
|
||||
1: [10, 9, 8],
|
||||
2: [9, 8, 7],
|
||||
@@ -89,22 +89,45 @@ class TestCrewEvaluator:
|
||||
2: [55, 33, 67],
|
||||
}
|
||||
|
||||
# Mock agents and assign them to tasks
|
||||
crew_planner.crew.agents = [
|
||||
mock.Mock(role="Agent 1"),
|
||||
mock.Mock(role="Agent 2"),
|
||||
]
|
||||
crew_planner.crew.tasks = [
|
||||
mock.Mock(
|
||||
agent=crew_planner.crew.agents[0], processed_by_agents=["Agent 1"]
|
||||
),
|
||||
mock.Mock(
|
||||
agent=crew_planner.crew.agents[1], processed_by_agents=["Agent 2"]
|
||||
),
|
||||
]
|
||||
|
||||
# Run the method
|
||||
crew_planner.print_crew_evaluation_result()
|
||||
|
||||
# Verify that the table is created with the appropriate structure and rows
|
||||
table.assert_has_calls(
|
||||
[
|
||||
mock.call(title="Tasks Scores \n (1-10 Higher is better)"),
|
||||
mock.call().add_column("Tasks/Crew"),
|
||||
mock.call().add_column("Run 1"),
|
||||
mock.call().add_column("Run 2"),
|
||||
mock.call().add_column("Avg. Total"),
|
||||
mock.call().add_row("Task 1", "10", "9", "9.5"),
|
||||
mock.call().add_row("Task 2", "9", "8", "8.5"),
|
||||
mock.call().add_row("Task 3", "8", "7", "7.5"),
|
||||
mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
|
||||
mock.call().add_row("Execution Time (s)", "135", "155", "145"),
|
||||
mock.call(
|
||||
title="Tasks Scores \n (1-10 Higher is better)", box=mock.ANY
|
||||
), # Title and styling
|
||||
mock.call().add_column("Tasks/Crew/Agents", style="cyan"), # Columns
|
||||
mock.call().add_column("Run 1", justify="center"),
|
||||
mock.call().add_column("Run 2", justify="center"),
|
||||
mock.call().add_column("Avg. Total", justify="center"),
|
||||
mock.call().add_column("Agents", style="green"),
|
||||
# Verify rows for tasks with agents
|
||||
mock.call().add_row("Task 1", "10.0", "9.0", "9.5", "- Agent 1"),
|
||||
mock.call().add_row("", "", "", "", "", ""), # Blank row between tasks
|
||||
mock.call().add_row("Task 2", "9.0", "8.0", "8.5", "- Agent 2"),
|
||||
# Add crew averages and execution times
|
||||
mock.call().add_row("Crew", "9.00", "8.00", "8.5", ""),
|
||||
mock.call().add_row("Execution Time (s)", "135", "155", "145", ""),
|
||||
]
|
||||
)
|
||||
|
||||
# Ensure the console prints the table
|
||||
console.assert_has_calls([mock.call(), mock.call().print(table())])
|
||||
|
||||
def test_evaluate(self, crew_planner):
|
||||
|
||||
Reference in New Issue
Block a user