mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-21 05:48:14 +00:00
Merge branch 'main' into feature/procedure_v2
This commit is contained in:
@@ -397,7 +397,7 @@ def test_agent_moved_on_after_max_iterations():
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="The final answer is 42. But don't give it yet, instead keep using the `get_final_answer` tool over and over until you're told you can give yout final answer.",
|
||||
description="The final answer is 42. But don't give it yet, instead keep using the `get_final_answer` tool over and over until you're told you can give your final answer.",
|
||||
expected_output="The final answer",
|
||||
)
|
||||
output = agent.execute_task(
|
||||
@@ -948,7 +948,7 @@ def test_agent_use_trained_data(crew_training_handler):
|
||||
crew_training_handler().load.return_value = {
|
||||
agent.role: {
|
||||
"suggestions": [
|
||||
"The result of the math operatio must be right.",
|
||||
"The result of the math operation must be right.",
|
||||
"Result must be better than 1.",
|
||||
]
|
||||
}
|
||||
@@ -958,7 +958,7 @@ def test_agent_use_trained_data(crew_training_handler):
|
||||
|
||||
assert (
|
||||
result == "What is 1 + 1?You MUST follow these feedbacks: \n "
|
||||
"The result of the math operatio must be right.\n - Result must be better than 1."
|
||||
"The result of the math operation must be right.\n - Result must be better than 1."
|
||||
)
|
||||
crew_training_handler.assert_has_calls(
|
||||
[mock.call(), mock.call("trained_agents_data.pkl"), mock.call().load()]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@ from unittest import mock
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from crewai.cli.cli import train, version, reset_memories
|
||||
from crewai.cli.cli import reset_memories, test, train, version
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -133,3 +133,33 @@ def test_version_command_with_tools(runner):
|
||||
"crewai tools version:" in result.output
|
||||
or "crewai tools not installed" in result.output
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_default_iterations(test_crew, runner):
|
||||
result = runner.invoke(test)
|
||||
|
||||
test_crew.assert_called_once_with(3, "gpt-4o-mini")
|
||||
assert result.exit_code == 0
|
||||
assert "Testing the crew for 3 iterations with model gpt-4o-mini" in result.output
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_custom_iterations(test_crew, runner):
|
||||
result = runner.invoke(test, ["--n_iterations", "5", "--model", "gpt-4o"])
|
||||
|
||||
test_crew.assert_called_once_with(5, "gpt-4o")
|
||||
assert result.exit_code == 0
|
||||
assert "Testing the crew for 5 iterations with model gpt-4o" in result.output
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_invalid_string_iterations(test_crew, runner):
|
||||
result = runner.invoke(test, ["--n_iterations", "invalid"])
|
||||
|
||||
test_crew.assert_not_called()
|
||||
assert result.exit_code == 2
|
||||
assert (
|
||||
"Usage: test [OPTIONS]\nTry 'test --help' for help.\n\nError: Invalid value for '-n' / '--n_iterations': 'invalid' is not a valid integer.\n"
|
||||
in result.output
|
||||
)
|
||||
|
||||
97
tests/cli/test_crew_test.py
Normal file
97
tests/cli/test_crew_test.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import subprocess
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.cli import test_crew
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"n_iterations,model",
|
||||
[
|
||||
(1, "gpt-4o"),
|
||||
(5, "gpt-3.5-turbo"),
|
||||
(10, "gpt-4"),
|
||||
],
|
||||
)
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
def test_crew_success(mock_subprocess_run, n_iterations, model):
|
||||
"""Test the crew function for successful execution."""
|
||||
mock_subprocess_run.return_value = subprocess.CompletedProcess(
|
||||
args=f"poetry run test {n_iterations} {model}", returncode=0
|
||||
)
|
||||
result = test_crew.test_crew(n_iterations, model)
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", str(n_iterations), model],
|
||||
capture_output=False,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
def test_test_crew_zero_iterations(click):
|
||||
test_crew.test_crew(0, "gpt-4o")
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: The number of iterations must be a positive integer.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
def test_test_crew_negative_iterations(click):
|
||||
test_crew.test_crew(-2, "gpt-4o")
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: The number of iterations must be a positive integer.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
def test_test_crew_called_process_error(mock_subprocess_run, click):
|
||||
n_iterations = 5
|
||||
mock_subprocess_run.side_effect = subprocess.CalledProcessError(
|
||||
returncode=1,
|
||||
cmd=["poetry", "run", "test", str(n_iterations), "gpt-4o"],
|
||||
output="Error",
|
||||
stderr="Some error occurred",
|
||||
)
|
||||
test_crew.test_crew(n_iterations, "gpt-4o")
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", "5", "gpt-4o"],
|
||||
capture_output=False,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
click.echo.assert_has_calls(
|
||||
[
|
||||
mock.call.echo(
|
||||
"An error occurred while testing the crew: Command '['poetry', 'run', 'test', '5', 'gpt-4o']' returned non-zero exit status 1.",
|
||||
err=True,
|
||||
),
|
||||
mock.call.echo("Error", err=True),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
def test_test_crew_unexpected_exception(mock_subprocess_run, click):
|
||||
# Arrange
|
||||
n_iterations = 5
|
||||
mock_subprocess_run.side_effect = Exception("Unexpected error")
|
||||
test_crew.test_crew(n_iterations, "gpt-4o")
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", "5", "gpt-4o"],
|
||||
capture_output=False,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: Unexpected error", err=True
|
||||
)
|
||||
@@ -8,6 +8,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import pydantic_core
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.agents.cache import CacheHandler
|
||||
from crewai.crew import Crew
|
||||
@@ -68,7 +69,7 @@ def test_crew_config_conditional_requirement():
|
||||
"agent": "Senior Researcher",
|
||||
},
|
||||
{
|
||||
"description": "Write a 1 amazing paragraph highlight for each idead that showcases how good an article about this topic could be, check references if necessary or search for more content but make sure it's unique, interesting and well written. Return the list of ideas with their paragraph and your notes.",
|
||||
"description": "Write a 1 amazing paragraph highlight for each idea that showcases how good an article about this topic could be, check references if necessary or search for more content but make sure it's unique, interesting and well written. Return the list of ideas with their paragraph and your notes.",
|
||||
"expected_output": "A 4 paragraph article about AI.",
|
||||
"agent": "Senior Writer",
|
||||
},
|
||||
@@ -571,6 +572,47 @@ def test_api_calls_throttling(capsys):
|
||||
moveon.assert_called()
|
||||
|
||||
|
||||
# This test is not consistent, some issue is happening on the CI when it comes to Prompt tokens
|
||||
# {'usage_metrics': {'completion_tokens': 34, 'prompt_tokens': 0, 'successful_requests': 2, 'total_tokens': 34}} CI OUTPUT
|
||||
# {'usage_metrics': {'completion_tokens': 34, 'prompt_tokens': 314, 'successful_requests': 2, 'total_tokens': 348}}
|
||||
# The issue might be related to the calculate_usage_metrics function
|
||||
# @pytest.mark.vcr(filter_headers=["authorization"])
|
||||
# def test_crew_full_output():
|
||||
# agent = Agent(
|
||||
# role="test role",
|
||||
# goal="test goal",
|
||||
# backstory="test backstory",
|
||||
# allow_delegation=False,
|
||||
# verbose=True,
|
||||
# )
|
||||
|
||||
# task1 = Task(
|
||||
# description="just say hi!",
|
||||
# expected_output="your greeting",
|
||||
# agent=agent,
|
||||
# )
|
||||
# task2 = Task(
|
||||
# description="just say hello!",
|
||||
# expected_output="your greeting",
|
||||
# agent=agent,
|
||||
# )
|
||||
|
||||
# crew = Crew(agents=[agent], tasks=[task1, task2], full_output=True)
|
||||
|
||||
# result = crew.kickoff()
|
||||
|
||||
# assert result == {
|
||||
# "final_output": "Hello!",
|
||||
# "tasks_outputs": [task1.output, task2.output],
|
||||
# "usage_metrics": {
|
||||
# "total_tokens": 348,
|
||||
# "prompt_tokens": 314,
|
||||
# "completion_tokens": 34,
|
||||
# "successful_requests": 2,
|
||||
# },
|
||||
# }
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_crew_kickoff_usage_metrics():
|
||||
inputs = [
|
||||
@@ -632,18 +674,21 @@ def test_sequential_async_task_execution_completion():
|
||||
list_ideas = Task(
|
||||
description="Give me a list of 5 interesting ideas to explore for an article, what makes them unique and interesting.",
|
||||
expected_output="Bullet point list of 5 important events.",
|
||||
max_retry_limit=3,
|
||||
agent=researcher,
|
||||
async_execution=True,
|
||||
)
|
||||
list_important_history = Task(
|
||||
description="Research the history of AI and give me the 5 most important events that shaped the technology.",
|
||||
expected_output="Bullet point list of 5 important events.",
|
||||
max_retry_limit=3,
|
||||
agent=researcher,
|
||||
async_execution=True,
|
||||
)
|
||||
write_article = Task(
|
||||
description="Write an article about the history of AI and its most important events.",
|
||||
expected_output="A 4 paragraph article about AI.",
|
||||
max_retry_limit=3,
|
||||
agent=writer,
|
||||
context=[list_ideas, list_important_history],
|
||||
)
|
||||
@@ -656,7 +701,7 @@ def test_sequential_async_task_execution_completion():
|
||||
|
||||
sequential_result = sequential_crew.kickoff()
|
||||
assert sequential_result.raw.startswith(
|
||||
"**The Evolution of Artificial Intelligence: A Journey Through Milestones**"
|
||||
"The history of artificial intelligence (AI) is marked by several pivotal events that have shaped its evolution and impact on various sectors."
|
||||
)
|
||||
|
||||
|
||||
@@ -1188,7 +1233,7 @@ def test_task_with_no_arguments():
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Look at the available data nd give me a sense on the total number of sales.",
|
||||
description="Look at the available data and give me a sense on the total number of sales.",
|
||||
expected_output="The total number of sales as an integer",
|
||||
agent=researcher,
|
||||
)
|
||||
@@ -1235,7 +1280,7 @@ def test_delegation_is_not_enabled_if_there_are_only_one_agent():
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description="Look at the available data nd give me a sense on the total number of sales.",
|
||||
description="Look at the available data and give me a sense on the total number of sales.",
|
||||
expected_output="The total number of sales as an integer",
|
||||
agent=researcher,
|
||||
)
|
||||
@@ -1311,14 +1356,14 @@ def test_agent_usage_metrics_are_captured_for_hierarchical_process():
|
||||
)
|
||||
|
||||
result = crew.kickoff()
|
||||
assert result.raw == '"Howdy!"'
|
||||
assert result.raw == "Howdy!"
|
||||
|
||||
print(crew.usage_metrics)
|
||||
|
||||
assert crew.usage_metrics == {
|
||||
"total_tokens": 311,
|
||||
"prompt_tokens": 224,
|
||||
"completion_tokens": 87,
|
||||
"total_tokens": 219,
|
||||
"prompt_tokens": 201,
|
||||
"completion_tokens": 18,
|
||||
"successful_requests": 1,
|
||||
}
|
||||
|
||||
@@ -1355,28 +1400,66 @@ def test_hierarchical_crew_creation_tasks_with_agents():
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_hierarchical_crew_creation_tasks_with_async_execution():
|
||||
"""
|
||||
Agents are not required for tasks in a hierarchical process but sometimes they are still added
|
||||
This test makes sure that the manager still delegates the task to the agent even if the agent is passed in the task
|
||||
"""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
task = Task(
|
||||
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
|
||||
expected_output="5 bullet points with a paragraph for each idea.",
|
||||
async_execution=True, # should throw an error
|
||||
description="Write one amazing paragraph about AI.",
|
||||
expected_output="A single paragraph with 4 sentences.",
|
||||
agent=writer,
|
||||
async_execution=True,
|
||||
)
|
||||
|
||||
with pytest.raises(pydantic_core._pydantic_core.ValidationError) as exec_info:
|
||||
Crew(
|
||||
tasks=[task],
|
||||
agents=[researcher],
|
||||
process=Process.hierarchical,
|
||||
manager_llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
|
||||
assert (
|
||||
exec_info.value.errors()[0]["type"] == "async_execution_in_hierarchical_process"
|
||||
crew = Crew(
|
||||
tasks=[task],
|
||||
agents=[writer, researcher, ceo],
|
||||
process=Process.hierarchical,
|
||||
manager_llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
assert (
|
||||
"Hierarchical process error: Tasks cannot be flagged with async_execution."
|
||||
in exec_info.value.errors()[0]["msg"]
|
||||
|
||||
crew.kickoff()
|
||||
assert crew.manager_agent is not None
|
||||
assert crew.manager_agent.tools is not None
|
||||
assert crew.manager_agent.tools[0].description.startswith(
|
||||
"Delegate a specific task to one of the following coworkers: Senior Writer\n"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_hierarchical_crew_creation_tasks_with_sync_last():
|
||||
"""
|
||||
Agents are not required for tasks in a hierarchical process but sometimes they are still added
|
||||
This test makes sure that the manager still delegates the task to the agent even if the agent is passed in the task
|
||||
"""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
task = Task(
|
||||
description="Write one amazing paragraph about AI.",
|
||||
expected_output="A single paragraph with 4 sentences.",
|
||||
agent=writer,
|
||||
async_execution=True,
|
||||
)
|
||||
task2 = Task(
|
||||
description="Write one amazing paragraph about AI.",
|
||||
expected_output="A single paragraph with 4 sentences.",
|
||||
async_execution=False,
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
tasks=[task, task2],
|
||||
agents=[writer, researcher, ceo],
|
||||
process=Process.hierarchical,
|
||||
manager_llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
|
||||
crew.kickoff()
|
||||
assert crew.manager_agent is not None
|
||||
assert crew.manager_agent.tools is not None
|
||||
assert crew.manager_agent.tools[0].description.startswith(
|
||||
"Delegate a specific task to one of the following coworkers: Senior Writer, Researcher, CEO\n"
|
||||
)
|
||||
|
||||
|
||||
@@ -1560,16 +1643,16 @@ def test_tools_with_custom_caching():
|
||||
|
||||
writer1 = Agent(
|
||||
role="Writer",
|
||||
goal="You write lesssons of math for kids.",
|
||||
backstory="You're an expert in writting and you love to teach kids but you know nothing of math.",
|
||||
goal="You write lessons of math for kids.",
|
||||
backstory="You're an expert in writing and you love to teach kids but you know nothing of math.",
|
||||
tools=[multiplcation_tool],
|
||||
allow_delegation=False,
|
||||
)
|
||||
|
||||
writer2 = Agent(
|
||||
role="Writer",
|
||||
goal="You write lesssons of math for kids.",
|
||||
backstory="You're an expert in writting and you love to teach kids but you know nothing of math.",
|
||||
goal="You write lessons of math for kids.",
|
||||
backstory="You're an expert in writing and you love to teach kids but you know nothing of math.",
|
||||
tools=[multiplcation_tool],
|
||||
allow_delegation=False,
|
||||
)
|
||||
@@ -2499,3 +2582,34 @@ def test_conditional_should_execute():
|
||||
assert condition_mock.call_count == 1
|
||||
assert condition_mock() is True
|
||||
assert mock_execute_sync.call_count == 2
|
||||
|
||||
|
||||
@mock.patch("crewai.crew.CrewEvaluator")
|
||||
@mock.patch("crewai.crew.Crew.kickoff")
|
||||
def test_crew_testing_function(mock_kickoff, crew_evaluator):
|
||||
task = Task(
|
||||
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
|
||||
expected_output="5 bullet points with a paragraph for each idea.",
|
||||
agent=researcher,
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[researcher],
|
||||
tasks=[task],
|
||||
)
|
||||
n_iterations = 2
|
||||
crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
|
||||
|
||||
assert len(mock_kickoff.mock_calls) == n_iterations
|
||||
mock_kickoff.assert_has_calls(
|
||||
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
|
||||
)
|
||||
|
||||
crew_evaluator.assert_has_calls(
|
||||
[
|
||||
mock.call(crew, "gpt-4o-mini"),
|
||||
mock.call().set_iteration(1),
|
||||
mock.call().set_iteration(2),
|
||||
mock.call().print_crew_evaluation_result(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -5,13 +5,12 @@ import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
from pydantic_core import ValidationError
|
||||
|
||||
from crewai import Agent, Crew, Process, Task
|
||||
from crewai.tasks.conditional_task import ConditionalTask
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.converter import Converter
|
||||
from pydantic import BaseModel
|
||||
from pydantic_core import ValidationError
|
||||
|
||||
|
||||
def test_task_tool_reflect_agent_tools():
|
||||
@@ -110,7 +109,7 @@ def test_task_callback():
|
||||
task_completed.assert_called_once_with(task.output)
|
||||
|
||||
|
||||
def test_task_callback_returns_task_ouput():
|
||||
def test_task_callback_returns_task_output():
|
||||
from crewai.tasks.output_format import OutputFormat
|
||||
|
||||
researcher = Agent(
|
||||
|
||||
113
tests/utilities/evaluators/test_crew_evaluator_handler.py
Normal file
113
tests/utilities/evaluators/test_crew_evaluator_handler.py
Normal file
@@ -0,0 +1,113 @@
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.crew import Crew
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.evaluators.crew_evaluator_handler import (
|
||||
CrewEvaluator,
|
||||
TaskEvaluationPydanticOutput,
|
||||
)
|
||||
|
||||
|
||||
class TestCrewEvaluator:
|
||||
@pytest.fixture
|
||||
def crew_planner(self):
|
||||
agent = Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1")
|
||||
task = Task(
|
||||
description="Task 1",
|
||||
expected_output="Output 1",
|
||||
agent=agent,
|
||||
)
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
|
||||
return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")
|
||||
|
||||
def test_setup_for_evaluating(self, crew_planner):
|
||||
crew_planner._setup_for_evaluating()
|
||||
assert crew_planner.crew.tasks[0].callback == crew_planner.evaluate
|
||||
|
||||
def test_set_iteration(self, crew_planner):
|
||||
crew_planner.set_iteration(1)
|
||||
assert crew_planner.iteration == 1
|
||||
|
||||
def test_evaluator_agent(self, crew_planner):
|
||||
agent = crew_planner._evaluator_agent()
|
||||
assert agent.role == "Task Execution Evaluator"
|
||||
assert (
|
||||
agent.goal
|
||||
== "Your goal is to evaluate the performance of the agents in the crew based on the tasks they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
|
||||
)
|
||||
assert (
|
||||
agent.backstory
|
||||
== "Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed"
|
||||
)
|
||||
assert agent.verbose is False
|
||||
assert agent.llm.model_name == "gpt-4o-mini"
|
||||
|
||||
def test_evaluation_task(self, crew_planner):
|
||||
evaluator_agent = Agent(
|
||||
role="Evaluator Agent",
|
||||
goal="Evaluate the performance of the agents in the crew",
|
||||
backstory="Master in Evaluation",
|
||||
)
|
||||
task_to_evaluate = Task(
|
||||
description="Task 1",
|
||||
expected_output="Output 1",
|
||||
agent=Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1"),
|
||||
)
|
||||
task_output = "Task Output 1"
|
||||
task = crew_planner._evaluation_task(
|
||||
evaluator_agent, task_to_evaluate, task_output
|
||||
)
|
||||
|
||||
assert task.description.startswith(
|
||||
"Based on the task description and the expected output, compare and evaluate the performance of the agents in the crew based on the Task Output they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
|
||||
)
|
||||
|
||||
assert task.agent == evaluator_agent
|
||||
assert (
|
||||
task.description
|
||||
== "Based on the task description and the expected output, compare and evaluate "
|
||||
"the performance of the agents in the crew based on the Task Output they have "
|
||||
"performed using score from 1 to 10 evaluating on completion, quality, and overall "
|
||||
"performance.task_description: Task 1 task_expected_output: Output 1 "
|
||||
"agent: Agent 1 agent_goal: Goal 1 Task Output: Task Output 1"
|
||||
)
|
||||
|
||||
@mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console")
|
||||
@mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table")
|
||||
def test_print_crew_evaluation_result(self, table, console, crew_planner):
|
||||
crew_planner.tasks_scores = {
|
||||
1: [10, 9, 8],
|
||||
2: [9, 8, 7],
|
||||
}
|
||||
|
||||
crew_planner.print_crew_evaluation_result()
|
||||
|
||||
table.assert_has_calls(
|
||||
[
|
||||
mock.call(title="Tasks Scores \n (1-10 Higher is better)"),
|
||||
mock.call().add_column("Tasks/Crew"),
|
||||
mock.call().add_column("Run 1"),
|
||||
mock.call().add_column("Run 2"),
|
||||
mock.call().add_column("Avg. Total"),
|
||||
mock.call().add_row("Task 1", "10", "9", "9.5"),
|
||||
mock.call().add_row("Task 2", "9", "8", "8.5"),
|
||||
mock.call().add_row("Task 3", "8", "7", "7.5"),
|
||||
mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
|
||||
]
|
||||
)
|
||||
console.assert_has_calls([mock.call(), mock.call().print(table())])
|
||||
|
||||
def test_evaluate(self, crew_planner):
|
||||
task_output = TaskOutput(
|
||||
description="Task 1", agent=str(crew_planner.crew.agents[0])
|
||||
)
|
||||
|
||||
with mock.patch.object(Task, "execute_sync") as execute:
|
||||
execute().pydantic = TaskEvaluationPydanticOutput(quality=9.5)
|
||||
crew_planner.evaluate(task_output)
|
||||
assert crew_planner.tasks_scores[0] == [9.5]
|
||||
@@ -56,8 +56,7 @@ def test_evaluate_training_data(converter_mock):
|
||||
"based on the human feedback\n",
|
||||
model=TrainingTaskEvaluation,
|
||||
instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
|
||||
"following structure, with the following keys:\n- suggestions: List[str]\n- "
|
||||
"quality: float\n- final_summary: str",
|
||||
"following structure, with the following keys:\n{\n suggestions: List[str],\n quality: float,\n final_summary: str\n}",
|
||||
),
|
||||
mock.call().to_pydantic(),
|
||||
]
|
||||
|
||||
266
tests/utilities/test_converter.py
Normal file
266
tests/utilities/test_converter.py
Normal file
@@ -0,0 +1,266 @@
|
||||
import json
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
from crewai.utilities.converter import (
|
||||
Converter,
|
||||
ConverterError,
|
||||
convert_to_model,
|
||||
convert_with_instructions,
|
||||
create_converter,
|
||||
get_conversion_instructions,
|
||||
handle_partial_json,
|
||||
is_gpt,
|
||||
validate_model,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
# Sample Pydantic models for testing
|
||||
class EmailResponse(BaseModel):
|
||||
previous_message_content: str
|
||||
|
||||
|
||||
class EmailResponses(BaseModel):
|
||||
responses: list[EmailResponse]
|
||||
|
||||
|
||||
class SimpleModel(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
|
||||
|
||||
class NestedModel(BaseModel):
|
||||
id: int
|
||||
data: SimpleModel
|
||||
|
||||
|
||||
# Fixtures
|
||||
@pytest.fixture
|
||||
def mock_agent():
|
||||
agent = Mock()
|
||||
agent.function_calling_llm = None
|
||||
agent.llm = Mock()
|
||||
return agent
|
||||
|
||||
|
||||
# Tests for convert_to_model
|
||||
def test_convert_to_model_with_valid_json():
|
||||
result = '{"name": "John", "age": 30}'
|
||||
output = convert_to_model(result, SimpleModel, None, None)
|
||||
assert isinstance(output, SimpleModel)
|
||||
assert output.name == "John"
|
||||
assert output.age == 30
|
||||
|
||||
|
||||
def test_convert_to_model_with_invalid_json():
|
||||
result = '{"name": "John", "age": "thirty"}'
|
||||
with patch("crewai.utilities.converter.handle_partial_json") as mock_handle:
|
||||
mock_handle.return_value = "Fallback result"
|
||||
output = convert_to_model(result, SimpleModel, None, None)
|
||||
assert output == "Fallback result"
|
||||
|
||||
|
||||
def test_convert_to_model_with_no_model():
|
||||
result = "Plain text"
|
||||
output = convert_to_model(result, None, None, None)
|
||||
assert output == "Plain text"
|
||||
|
||||
|
||||
def test_convert_to_model_with_special_characters():
|
||||
json_string_test = """
|
||||
{
|
||||
"responses": [
|
||||
{
|
||||
"previous_message_content": "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
output = convert_to_model(json_string_test, EmailResponses, None, None)
|
||||
assert isinstance(output, EmailResponses)
|
||||
assert len(output.responses) == 1
|
||||
assert (
|
||||
output.responses[0].previous_message_content
|
||||
== "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
|
||||
)
|
||||
|
||||
|
||||
def test_convert_to_model_with_escaped_special_characters():
|
||||
json_string_test = json.dumps(
|
||||
{
|
||||
"responses": [
|
||||
{
|
||||
"previous_message_content": "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
output = convert_to_model(json_string_test, EmailResponses, None, None)
|
||||
assert isinstance(output, EmailResponses)
|
||||
assert len(output.responses) == 1
|
||||
assert (
|
||||
output.responses[0].previous_message_content
|
||||
== "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
|
||||
)
|
||||
|
||||
|
||||
def test_convert_to_model_with_multiple_special_characters():
|
||||
json_string_test = """
|
||||
{
|
||||
"responses": [
|
||||
{
|
||||
"previous_message_content": "Line 1\r\nLine 2\tTabbed\nLine 3\r\n\rEscaped newline"
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
output = convert_to_model(json_string_test, EmailResponses, None, None)
|
||||
assert isinstance(output, EmailResponses)
|
||||
assert len(output.responses) == 1
|
||||
assert (
|
||||
output.responses[0].previous_message_content
|
||||
== "Line 1\r\nLine 2\tTabbed\nLine 3\r\n\rEscaped newline"
|
||||
)
|
||||
|
||||
|
||||
# Tests for validate_model
|
||||
def test_validate_model_pydantic_output():
|
||||
result = '{"name": "Alice", "age": 25}'
|
||||
output = validate_model(result, SimpleModel, False)
|
||||
assert isinstance(output, SimpleModel)
|
||||
assert output.name == "Alice"
|
||||
assert output.age == 25
|
||||
|
||||
|
||||
def test_validate_model_json_output():
|
||||
result = '{"name": "Bob", "age": 40}'
|
||||
output = validate_model(result, SimpleModel, True)
|
||||
assert isinstance(output, dict)
|
||||
assert output == {"name": "Bob", "age": 40}
|
||||
|
||||
|
||||
# Tests for handle_partial_json
|
||||
def test_handle_partial_json_with_valid_partial():
|
||||
result = 'Some text {"name": "Charlie", "age": 35} more text'
|
||||
output = handle_partial_json(result, SimpleModel, False, None)
|
||||
assert isinstance(output, SimpleModel)
|
||||
assert output.name == "Charlie"
|
||||
assert output.age == 35
|
||||
|
||||
|
||||
def test_handle_partial_json_with_invalid_partial(mock_agent):
|
||||
result = "No valid JSON here"
|
||||
with patch("crewai.utilities.converter.convert_with_instructions") as mock_convert:
|
||||
mock_convert.return_value = "Converted result"
|
||||
output = handle_partial_json(result, SimpleModel, False, mock_agent)
|
||||
assert output == "Converted result"
|
||||
|
||||
|
||||
# Tests for convert_with_instructions
|
||||
@patch("crewai.utilities.converter.create_converter")
|
||||
@patch("crewai.utilities.converter.get_conversion_instructions")
|
||||
def test_convert_with_instructions_success(
|
||||
mock_get_instructions, mock_create_converter, mock_agent
|
||||
):
|
||||
mock_get_instructions.return_value = "Instructions"
|
||||
mock_converter = Mock()
|
||||
mock_converter.to_pydantic.return_value = SimpleModel(name="David", age=50)
|
||||
mock_create_converter.return_value = mock_converter
|
||||
|
||||
result = "Some text to convert"
|
||||
output = convert_with_instructions(result, SimpleModel, False, mock_agent)
|
||||
|
||||
assert isinstance(output, SimpleModel)
|
||||
assert output.name == "David"
|
||||
assert output.age == 50
|
||||
|
||||
|
||||
@patch("crewai.utilities.converter.create_converter")
|
||||
@patch("crewai.utilities.converter.get_conversion_instructions")
|
||||
def test_convert_with_instructions_failure(
|
||||
mock_get_instructions, mock_create_converter, mock_agent
|
||||
):
|
||||
mock_get_instructions.return_value = "Instructions"
|
||||
mock_converter = Mock()
|
||||
mock_converter.to_pydantic.return_value = ConverterError("Conversion failed")
|
||||
mock_create_converter.return_value = mock_converter
|
||||
|
||||
result = "Some text to convert"
|
||||
with patch("crewai.utilities.converter.Printer") as mock_printer:
|
||||
output = convert_with_instructions(result, SimpleModel, False, mock_agent)
|
||||
assert output == result
|
||||
mock_printer.return_value.print.assert_called_once()
|
||||
|
||||
|
||||
# Tests for get_conversion_instructions
|
||||
def test_get_conversion_instructions_gpt():
|
||||
mock_llm = Mock()
|
||||
mock_llm.openai_api_base = None
|
||||
with patch("crewai.utilities.converter.is_gpt", return_value=True):
|
||||
instructions = get_conversion_instructions(SimpleModel, mock_llm)
|
||||
assert instructions == "I'm gonna convert this raw text into valid JSON."
|
||||
|
||||
|
||||
def test_get_conversion_instructions_non_gpt():
|
||||
mock_llm = Mock()
|
||||
with patch("crewai.utilities.converter.is_gpt", return_value=False):
|
||||
with patch("crewai.utilities.converter.PydanticSchemaParser") as mock_parser:
|
||||
mock_parser.return_value.get_schema.return_value = "Sample schema"
|
||||
instructions = get_conversion_instructions(SimpleModel, mock_llm)
|
||||
assert "Sample schema" in instructions
|
||||
|
||||
|
||||
# Tests for is_gpt
|
||||
def test_is_gpt_true():
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
mock_llm = Mock(spec=ChatOpenAI)
|
||||
mock_llm.openai_api_base = None
|
||||
assert is_gpt(mock_llm) is True
|
||||
|
||||
|
||||
def test_is_gpt_false():
|
||||
mock_llm = Mock()
|
||||
assert is_gpt(mock_llm) is False
|
||||
|
||||
|
||||
class CustomConverter(Converter):
|
||||
pass
|
||||
|
||||
|
||||
def test_create_converter_with_mock_agent():
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.get_output_converter.return_value = MagicMock(spec=Converter)
|
||||
|
||||
converter = create_converter(
|
||||
agent=mock_agent,
|
||||
llm=Mock(),
|
||||
text="Sample",
|
||||
model=SimpleModel,
|
||||
instructions="Convert",
|
||||
)
|
||||
|
||||
assert isinstance(converter, Converter)
|
||||
mock_agent.get_output_converter.assert_called_once()
|
||||
|
||||
|
||||
def test_create_converter_with_custom_converter():
|
||||
converter = create_converter(
|
||||
converter_cls=CustomConverter,
|
||||
llm=Mock(),
|
||||
text="Sample",
|
||||
model=SimpleModel,
|
||||
instructions="Convert",
|
||||
)
|
||||
|
||||
assert isinstance(converter, CustomConverter)
|
||||
|
||||
|
||||
def test_create_converter_fails_without_agent_or_converter_cls():
|
||||
with pytest.raises(
|
||||
ValueError, match="Either agent or converter_cls must be provided"
|
||||
):
|
||||
create_converter(
|
||||
llm=Mock(), text="Sample", model=SimpleModel, instructions="Convert"
|
||||
)
|
||||
@@ -1,10 +1,11 @@
|
||||
from unittest.mock import patch
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
|
||||
import pytest
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.planning_handler import CrewPlanner, PlannerTaskPydanticOutput
|
||||
|
||||
|
||||
@@ -28,7 +29,19 @@ class TestCrewPlanner:
|
||||
agent=Agent(role="Agent 3", goal="Goal 3", backstory="Backstory 3"),
|
||||
),
|
||||
]
|
||||
return CrewPlanner(tasks)
|
||||
return CrewPlanner(tasks, None)
|
||||
|
||||
@pytest.fixture
|
||||
def crew_planner_different_llm(self):
|
||||
tasks = [
|
||||
Task(
|
||||
description="Task 1",
|
||||
expected_output="Output 1",
|
||||
agent=Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1"),
|
||||
)
|
||||
]
|
||||
planning_agent_llm = ChatOpenAI(model="gpt-3.5-turbo")
|
||||
return CrewPlanner(tasks, planning_agent_llm)
|
||||
|
||||
def test_handle_crew_planning(self, crew_planner):
|
||||
with patch.object(Task, "execute_sync") as execute:
|
||||
@@ -40,7 +53,7 @@ class TestCrewPlanner:
|
||||
),
|
||||
)
|
||||
result = crew_planner._handle_crew_planning()
|
||||
|
||||
assert crew_planner.planning_agent_llm.model_name == "gpt-4o-mini"
|
||||
assert isinstance(result, PlannerTaskPydanticOutput)
|
||||
assert len(result.list_of_plans_per_task) == len(crew_planner.tasks)
|
||||
execute.assert_called_once()
|
||||
@@ -72,3 +85,22 @@ class TestCrewPlanner:
|
||||
assert isinstance(tasks_summary, str)
|
||||
assert tasks_summary.startswith("\n Task Number 1 - Task 1")
|
||||
assert tasks_summary.endswith('"agent_tools": []\n ')
|
||||
|
||||
def test_handle_crew_planning_different_llm(self, crew_planner_different_llm):
|
||||
with patch.object(Task, "execute_sync") as execute:
|
||||
execute.return_value = TaskOutput(
|
||||
description="Description",
|
||||
agent="agent",
|
||||
pydantic=PlannerTaskPydanticOutput(list_of_plans_per_task=["Plan 1"]),
|
||||
)
|
||||
result = crew_planner_different_llm._handle_crew_planning()
|
||||
|
||||
assert (
|
||||
crew_planner_different_llm.planning_agent_llm.model_name
|
||||
== "gpt-3.5-turbo"
|
||||
)
|
||||
assert isinstance(result, PlannerTaskPydanticOutput)
|
||||
assert len(result.list_of_plans_per_task) == len(
|
||||
crew_planner_different_llm.tasks
|
||||
)
|
||||
execute.assert_called_once()
|
||||
|
||||
Reference in New Issue
Block a user