Merge in main to bugfix/kickoff-for-each-usage-metrics

This commit is contained in:
Brandon Hancock
2024-07-01 14:00:13 -04:00
parent 1d2827e9a5
commit 2efe16eac9
54 changed files with 411517 additions and 6465 deletions

View File

@@ -1,5 +1,6 @@
"""Test Agent creation and execution basic functionality."""
from unittest import mock
from unittest.mock import patch
import pytest
@@ -11,6 +12,7 @@ from crewai import Agent, Crew, Task
from crewai.agents.cache import CacheHandler
from crewai.agents.executor import CrewAgentExecutor
from crewai.agents.parser import CrewAgentParser
from crewai.tools.tool_calling import InstructorToolCalling
from crewai.tools.tool_usage import ToolUsage
from crewai.utilities import RPMController
@@ -842,3 +844,54 @@ Thought:
"""
)
@patch("crewai.agent.CrewTrainingHandler")
def test_agent_training_handler(crew_training_handler):
task_prompt = "What is 1 + 1?"
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
verbose=True,
)
crew_training_handler().load.return_value = {
f"{str(agent.id)}": {"0": {"human_feedback": "good"}}
}
result = agent._training_handler(task_prompt=task_prompt)
assert result == "What is 1 + 1?You MUST follow these feedbacks: \n good"
crew_training_handler.assert_has_calls(
[mock.call(), mock.call("training_data.pkl"), mock.call().load()]
)
@patch("crewai.agent.CrewTrainingHandler")
def test_agent_use_trained_data(crew_training_handler):
task_prompt = "What is 1 + 1?"
agent = Agent(
role="researcher",
goal="test goal",
backstory="test backstory",
verbose=True,
)
crew_training_handler().load.return_value = {
agent.role: {
"suggestions": [
"The result of the math operatio must be right.",
"Result must be better than 1.",
]
}
}
result = agent._use_trained_data(task_prompt=task_prompt)
assert (
result == "What is 1 + 1?You MUST follow these feedbacks: \n "
"The result of the math operatio must be right.\n - Result must be better than 1."
)
crew_training_handler.assert_has_calls(
[mock.call(), mock.call("trained_agents_data.pkl"), mock.call().load()]
)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,8 @@
"""Test Agent creation and execution basic functionality."""
import json
from unittest import mock
from unittest.mock import patch
import pydantic_core
import pytest
@@ -155,9 +157,9 @@ def test_hierarchical_process():
manager_llm=ChatOpenAI(temperature=0, model="gpt-4"),
tasks=[task],
)
result = crew.kickoff()
assert (
crew.kickoff()
result
== "1. 'Demystifying AI: An in-depth exploration of Artificial Intelligence for the layperson' - In this piece, we will unravel the enigma of AI, simplifying its complexities into digestible information for the everyday individual. By using relatable examples and analogies, we will journey through the neural networks and machine learning algorithms that define AI, without the jargon and convoluted explanations that often accompany such topics.\n\n2. 'The Role of AI in Startups: A Game Changer?' - Startups today are harnessing the power of AI to revolutionize their businesses. This article will delve into how AI, as an innovative force, is shaping the startup ecosystem, transforming everything from customer service to product development. We'll explore real-life case studies of startups that have leveraged AI to accelerate their growth and disrupt their respective industries.\n\n3. 'AI and Ethics: Navigating the Complex Landscape' - AI brings with it not just technological advancements, but ethical dilemmas as well. This article will engage readers in a thought-provoking discussion on the ethical implications of AI, exploring issues like bias in algorithms, privacy concerns, job displacement, and the moral responsibility of AI developers. We will also discuss potential solutions and frameworks to address these challenges.\n\n4. 'Unveiling the AI Agents: The Future of Customer Service' - AI agents are poised to reshape the customer service landscape, offering businesses the ability to provide round-the-clock support and personalized experiences. In this article, we'll dive deep into the world of AI agents, examining how they work, their benefits and limitations, and how they're set to redefine customer interactions in the digital age.\n\n5. 'From Science Fiction to Reality: AI in Everyday Life' - AI, once a concept limited to the realm of sci-fi, has now permeated our daily lives. This article will highlight the ubiquitous presence of AI, from voice assistants and recommendation algorithms, to autonomous vehicles and smart homes. We'll explore how AI, in its various forms, is transforming our everyday experiences, making the future seem a lot closer than we imagined."
)
@@ -381,14 +383,15 @@ def test_crew_full_ouput():
crew = Crew(agents=[agent], tasks=[task1, task2], full_output=True)
result = crew.kickoff()
assert result == {
"final_output": "Hello! It is a delight to receive your message. I trust this response finds you in good spirits. It's indeed a pleasure to connect with you too.",
"final_output": "Hello!",
"tasks_outputs": [task1.output, task2.output],
"usage_metrics": {
"completion_tokens": 109,
"prompt_tokens": 330,
"successful_requests": 2,
"total_tokens": 439,
"total_tokens": 517,
"prompt_tokens": 466,
"completion_tokens": 51,
"successful_requests": 3,
},
}
assert False
@@ -934,6 +937,30 @@ def test_task_with_no_arguments():
assert result == "75"
def test_code_execution_flag_adds_code_tool_upon_kickoff():
from crewai_tools import CodeInterpreterTool
programmer = Agent(
role="Programmer",
goal="Write code to solve problems.",
backstory="You're a programmer who loves to solve problems with code.",
allow_delegation=False,
allow_code_execution=True,
)
task = Task(
description="How much is 2 + 2?",
expected_output="The result of the sum as an integer.",
agent=programmer,
)
crew = Crew(agents=[programmer], tasks=[task])
crew.kickoff()
assert len(programmer.tools) == 1
assert programmer.tools[0].__class__ == CodeInterpreterTool
@pytest.mark.vcr(filter_headers=["authorization"])
def test_delegation_is_not_enabled_if_there_are_only_one_agent():
from unittest.mock import patch
@@ -951,7 +978,6 @@ def test_delegation_is_not_enabled_if_there_are_only_one_agent():
)
crew = Crew(agents=[researcher], tasks=[task])
with patch.object(Task, "execute") as execute:
execute.return_value = "ok"
crew.kickoff()
@@ -1019,15 +1045,15 @@ def test_agent_usage_metrics_are_captured_for_hierarchical_process():
agents=[agent],
tasks=[task],
process=Process.hierarchical,
manager_llm=ChatOpenAI(temperature=0, model="gpt-4"),
manager_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
)
result = crew.kickoff()
assert result == '"Howdy!"'
print(crew.usage_metrics)
assert crew.usage_metrics == {
"total_tokens": 1659,
"prompt_tokens": 1376,
"total_tokens": 1616,
"prompt_tokens": 1333,
"completion_tokens": 283,
"successful_requests": 3,
}
@@ -1343,7 +1369,10 @@ def test_manager_agent_with_tools_raises_exception():
crew.kickoff()
def test_crew_train_success():
@patch("crewai.crew.Crew.kickoff")
@patch("crewai.crew.CrewTrainingHandler")
@patch("crewai.crew.TaskEvaluator")
def test_crew_train_success(task_evaluator, crew_training_handler, kickoff):
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
expected_output="5 bullet points with a paragraph for each idea.",
@@ -1353,8 +1382,48 @@ def test_crew_train_success():
agents=[researcher, writer],
tasks=[task],
)
crew.train(n_iterations=2, inputs={"topic": "AI"})
task_evaluator.assert_has_calls(
[
mock.call(researcher),
mock.call().evaluate_training_data(
training_data=crew_training_handler().load(),
agent_id=str(researcher.id),
),
mock.call().evaluate_training_data().model_dump(),
mock.call(writer),
mock.call().evaluate_training_data(
training_data=crew_training_handler().load(),
agent_id=str(writer.id),
),
mock.call().evaluate_training_data().model_dump(),
]
)
crew.train(n_iterations=2)
crew_training_handler.assert_has_calls(
[
mock.call("training_data.pkl"),
mock.call().load(),
mock.call("trained_agents_data.pkl"),
mock.call().save_trained_data(
agent_id="Researcher",
trained_data=task_evaluator().evaluate_training_data().model_dump(),
),
mock.call("trained_agents_data.pkl"),
mock.call().save_trained_data(
agent_id="Senior Writer",
trained_data=task_evaluator().evaluate_training_data().model_dump(),
),
mock.call(),
mock.call().load(),
mock.call(),
mock.call().load(),
]
)
kickoff.assert_has_calls(
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
)
def test_crew_train_error():
@@ -1373,3 +1442,32 @@ def test_crew_train_error():
assert "train() missing 1 required positional argument: 'n_iterations'" in str(
e
)
def test__setup_for_training():
researcher.allow_delegation = True
writer.allow_delegation = True
agents = [researcher, writer]
task = Task(
description="Come up with a list of 5 interesting ideas to explore for an article",
expected_output="5 bullet points with a paragraph for each idea.",
)
crew = Crew(
agents=agents,
tasks=[task],
)
assert crew._train is False
assert task.human_input is False
for agent in agents:
assert agent.allow_delegation is True
crew._setup_for_training()
assert crew._train is True
assert task.human_input is True
for agent in agents:
assert agent.allow_delegation is False

View File

@@ -1,7 +1,6 @@
"""Test Agent creation and execution basic functionality."""
import json
from unittest.mock import MagicMock, patch
import pytest

View File

@@ -0,0 +1,64 @@
from unittest import mock
from unittest.mock import MagicMock, patch
from crewai.utilities.evaluators.task_evaluator import (
TaskEvaluator,
TrainingTaskEvaluation,
)
@patch("crewai.utilities.evaluators.task_evaluator.Converter")
def test_evaluate_training_data(converter_mock):
training_data = {
"agent_id": {
"data1": {
"initial_output": "Initial output 1",
"human_feedback": "Human feedback 1",
"improved_output": "Improved output 1",
},
"data2": {
"initial_output": "Initial output 2",
"human_feedback": "Human feedback 2",
"improved_output": "Improved output 2",
},
}
}
agent_id = "agent_id"
original_agent = MagicMock()
function_return_value = TrainingTaskEvaluation(
suggestions=[
"The initial output was already good, having a detailed explanation. However, the improved output "
"gave similar information but in a more professional manner using better vocabulary. For future tasks, "
"try to implement more elaborate language and precise terminology from the beginning."
],
quality=8.0,
final_summary="The agent responded well initially. However, the improved output showed that there is room "
"for enhancement in terms of language usage, precision, and professionalism. For future tasks, the agent "
"should focus more on these points from the start to increase performance.",
)
converter_mock.return_value.to_pydantic.return_value = function_return_value
result = TaskEvaluator(original_agent=original_agent).evaluate_training_data(
training_data, agent_id
)
assert result == function_return_value
converter_mock.assert_has_calls(
[
mock.call(
llm=original_agent.llm,
text="Assess the quality of the training data based on the llm output, human feedback , and llm "
"output improved result.\n\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
"1\n\nImproved Output:\nImproved output 1\n\nInitial Output:\nInitial output 2\n\nHuman "
"Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\nPlease provide:\n- "
"Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs "
"provide action items based on human_feedback for future tasks\n- A score from 0 to 10 evaluating "
"on completion, quality, and overall performance from the improved output to the initial output "
"based on the human feedback\n",
model=TrainingTaskEvaluation,
instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
"following structure, with the following keys:\n- suggestions: List[str]\n- "
"quality: float\n- final_summary: str",
),
mock.call().to_pydantic(),
]
)

View File

@@ -0,0 +1,41 @@
import os
import unittest
import pytest
from crewai.utilities.file_handler import PickleHandler
class TestPickleHandler(unittest.TestCase):
def setUp(self):
self.file_name = "test_data.pkl"
self.file_path = os.path.join(os.getcwd(), self.file_name)
self.handler = PickleHandler(self.file_name)
def tearDown(self):
if os.path.exists(self.file_path):
os.remove(self.file_path)
def test_initialize_file(self):
assert os.path.exists(self.file_path) is True
assert os.path.getsize(self.file_path) >= 0
def test_save_and_load(self):
data = {"key": "value"}
self.handler.save(data)
loaded_data = self.handler.load()
assert loaded_data == data
def test_load_empty_file(self):
loaded_data = self.handler.load()
assert loaded_data == {}
def test_load_corrupted_file(self):
with open(self.file_path, "wb") as file:
file.write(b"corrupted data")
with pytest.raises(Exception) as exc:
self.handler.load()
assert str(exc.value) == "pickle data was truncated"
assert "<class '_pickle.UnpicklingError'>" == str(exc.type)

View File

@@ -0,0 +1,42 @@
import os
import unittest
from crewai.utilities.training_handler import CrewTrainingHandler
class TestCrewTrainingHandler(unittest.TestCase):
def setUp(self):
self.handler = CrewTrainingHandler("trained_data.pkl")
def tearDown(self):
os.remove("trained_data.pkl")
del self.handler
def test_save_trained_data(self):
agent_id = "agent1"
trained_data = {"param1": 1, "param2": 2}
self.handler.save_trained_data(agent_id, trained_data)
# Assert that the trained data is saved correctly
data = self.handler.load()
assert data[agent_id] == trained_data
def test_append_existing_agent(self):
train_iteration = 1
agent_id = "agent1"
new_data = {"param3": 3, "param4": 4}
self.handler.append(train_iteration, agent_id, new_data)
# Assert that the new data is appended correctly to the existing agent
data = self.handler.load()
assert data[agent_id][train_iteration] == new_data
def test_append_new_agent(self):
train_iteration = 1
agent_id = "agent2"
new_data = {"param5": 5, "param6": 6}
self.handler.append(train_iteration, agent_id, new_data)
# Assert that the new agent and data are appended correctly
data = self.handler.load()
assert data[agent_id][train_iteration] == new_data