Merge in main to bugfix/kickoff-for-each-usage-metrics

2026-01-10 00:28:31 +00:00 · 2024-07-01 14:00:13 -04:00
parent 1d2827e9a5
commit 2efe16eac9
54 changed files with 411517 additions and 6465 deletions
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -1,5 +1,6 @@
 """Test Agent creation and execution basic functionality."""

+from unittest import mock
 from unittest.mock import patch

 import pytest
@@ -11,6 +12,7 @@ from crewai import Agent, Crew, Task
 from crewai.agents.cache import CacheHandler
 from crewai.agents.executor import CrewAgentExecutor
 from crewai.agents.parser import CrewAgentParser
+
 from crewai.tools.tool_calling import InstructorToolCalling
 from crewai.tools.tool_usage import ToolUsage
 from crewai.utilities import RPMController
@@ -842,3 +844,54 @@ Thought:

 """
    )
+
+
+@patch("crewai.agent.CrewTrainingHandler")
+def test_agent_training_handler(crew_training_handler):
+    task_prompt = "What is 1 + 1?"
+    agent = Agent(
+        role="test role",
+        goal="test goal",
+        backstory="test backstory",
+        verbose=True,
+    )
+    crew_training_handler().load.return_value = {
+        f"{str(agent.id)}": {"0": {"human_feedback": "good"}}
+    }
+
+    result = agent._training_handler(task_prompt=task_prompt)
+
+    assert result == "What is 1 + 1?You MUST follow these feedbacks: \n good"
+
+    crew_training_handler.assert_has_calls(
+        [mock.call(), mock.call("training_data.pkl"), mock.call().load()]
+    )
+
+
+@patch("crewai.agent.CrewTrainingHandler")
+def test_agent_use_trained_data(crew_training_handler):
+    task_prompt = "What is 1 + 1?"
+    agent = Agent(
+        role="researcher",
+        goal="test goal",
+        backstory="test backstory",
+        verbose=True,
+    )
+    crew_training_handler().load.return_value = {
+        agent.role: {
+            "suggestions": [
+                "The result of the math operatio must be right.",
+                "Result must be better than 1.",
+            ]
+        }
+    }
+
+    result = agent._use_trained_data(task_prompt=task_prompt)
+
+    assert (
+        result == "What is 1 + 1?You MUST follow these feedbacks: \n "
+        "The result of the math operatio must be right.\n - Result must be better than 1."
+    )
+    crew_training_handler.assert_has_calls(
+        [mock.call(), mock.call("trained_agents_data.pkl"), mock.call().load()]
+    )
--- a/tests/cassettes/test_agent_without_max_rpm_respet_crew_rpm.yaml
+++ b/tests/cassettes/test_agent_without_max_rpm_respet_crew_rpm.yaml
--- a/tests/cassettes/test_code_execution_flag_adds_code_tool_upon_kickoff.yaml
+++ b/tests/cassettes/test_code_execution_flag_adds_code_tool_upon_kickoff.yaml
--- a/tests/cassettes/test_crew_full_ouput.yaml
+++ b/tests/cassettes/test_crew_full_ouput.yaml
--- a/tests/cassettes/test_increment_delegations_for_hierarchical_process.yaml
+++ b/tests/cassettes/test_increment_delegations_for_hierarchical_process.yaml
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -1,6 +1,8 @@
 """Test Agent creation and execution basic functionality."""

 import json
+from unittest import mock
+from unittest.mock import patch

 import pydantic_core
 import pytest
@@ -155,9 +157,9 @@ def test_hierarchical_process():
        manager_llm=ChatOpenAI(temperature=0, model="gpt-4"),
        tasks=[task],
    )
-
+    result = crew.kickoff()
    assert (
-        crew.kickoff()
+        result
        == "1. 'Demystifying AI: An in-depth exploration of Artificial Intelligence for the layperson' - In this piece, we will unravel the enigma of AI, simplifying its complexities into digestible information for the everyday individual. By using relatable examples and analogies, we will journey through the neural networks and machine learning algorithms that define AI, without the jargon and convoluted explanations that often accompany such topics.\n\n2. 'The Role of AI in Startups: A Game Changer?' - Startups today are harnessing the power of AI to revolutionize their businesses. This article will delve into how AI, as an innovative force, is shaping the startup ecosystem, transforming everything from customer service to product development. We'll explore real-life case studies of startups that have leveraged AI to accelerate their growth and disrupt their respective industries.\n\n3. 'AI and Ethics: Navigating the Complex Landscape' - AI brings with it not just technological advancements, but ethical dilemmas as well. This article will engage readers in a thought-provoking discussion on the ethical implications of AI, exploring issues like bias in algorithms, privacy concerns, job displacement, and the moral responsibility of AI developers. We will also discuss potential solutions and frameworks to address these challenges.\n\n4. 'Unveiling the AI Agents: The Future of Customer Service' - AI agents are poised to reshape the customer service landscape, offering businesses the ability to provide round-the-clock support and personalized experiences. In this article, we'll dive deep into the world of AI agents, examining how they work, their benefits and limitations, and how they're set to redefine customer interactions in the digital age.\n\n5. 'From Science Fiction to Reality: AI in Everyday Life' - AI, once a concept limited to the realm of sci-fi, has now permeated our daily lives. This article will highlight the ubiquitous presence of AI, from voice assistants and recommendation algorithms, to autonomous vehicles and smart homes. We'll explore how AI, in its various forms, is transforming our everyday experiences, making the future seem a lot closer than we imagined."
    )

@@ -381,14 +383,15 @@ def test_crew_full_ouput():
    crew = Crew(agents=[agent], tasks=[task1, task2], full_output=True)

    result = crew.kickoff()
+
    assert result == {
-        "final_output": "Hello! It is a delight to receive your message. I trust this response finds you in good spirits. It's indeed a pleasure to connect with you too.",
+        "final_output": "Hello!",
        "tasks_outputs": [task1.output, task2.output],
        "usage_metrics": {
-            "completion_tokens": 109,
-            "prompt_tokens": 330,
-            "successful_requests": 2,
-            "total_tokens": 439,
+            "total_tokens": 517,
+            "prompt_tokens": 466,
+            "completion_tokens": 51,
+            "successful_requests": 3,
        },
    }
    assert False
@@ -934,6 +937,30 @@ def test_task_with_no_arguments():
    assert result == "75"


+def test_code_execution_flag_adds_code_tool_upon_kickoff():
+    from crewai_tools import CodeInterpreterTool
+
+    programmer = Agent(
+        role="Programmer",
+        goal="Write code to solve problems.",
+        backstory="You're a programmer who loves to solve problems with code.",
+        allow_delegation=False,
+        allow_code_execution=True,
+    )
+
+    task = Task(
+        description="How much is 2 + 2?",
+        expected_output="The result of the sum as an integer.",
+        agent=programmer,
+    )
+
+    crew = Crew(agents=[programmer], tasks=[task])
+    crew.kickoff()
+    assert len(programmer.tools) == 1
+    assert programmer.tools[0].__class__ == CodeInterpreterTool
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
 def test_delegation_is_not_enabled_if_there_are_only_one_agent():
    from unittest.mock import patch

@@ -951,7 +978,6 @@ def test_delegation_is_not_enabled_if_there_are_only_one_agent():
    )

    crew = Crew(agents=[researcher], tasks=[task])
-
    with patch.object(Task, "execute") as execute:
        execute.return_value = "ok"
        crew.kickoff()
@@ -1019,15 +1045,15 @@ def test_agent_usage_metrics_are_captured_for_hierarchical_process():
        agents=[agent],
        tasks=[task],
        process=Process.hierarchical,
-        manager_llm=ChatOpenAI(temperature=0, model="gpt-4"),
+        manager_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
    )

    result = crew.kickoff()
    assert result == '"Howdy!"'
-    print(crew.usage_metrics)
+
    assert crew.usage_metrics == {
-        "total_tokens": 1659,
-        "prompt_tokens": 1376,
+        "total_tokens": 1616,
+        "prompt_tokens": 1333,
        "completion_tokens": 283,
        "successful_requests": 3,
    }
@@ -1343,7 +1369,10 @@ def test_manager_agent_with_tools_raises_exception():
        crew.kickoff()


-def test_crew_train_success():
+@patch("crewai.crew.Crew.kickoff")
+@patch("crewai.crew.CrewTrainingHandler")
+@patch("crewai.crew.TaskEvaluator")
+def test_crew_train_success(task_evaluator, crew_training_handler, kickoff):
    task = Task(
        description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
        expected_output="5 bullet points with a paragraph for each idea.",
@@ -1353,8 +1382,48 @@ def test_crew_train_success():
        agents=[researcher, writer],
        tasks=[task],
    )
+    crew.train(n_iterations=2, inputs={"topic": "AI"})
+    task_evaluator.assert_has_calls(
+        [
+            mock.call(researcher),
+            mock.call().evaluate_training_data(
+                training_data=crew_training_handler().load(),
+                agent_id=str(researcher.id),
+            ),
+            mock.call().evaluate_training_data().model_dump(),
+            mock.call(writer),
+            mock.call().evaluate_training_data(
+                training_data=crew_training_handler().load(),
+                agent_id=str(writer.id),
+            ),
+            mock.call().evaluate_training_data().model_dump(),
+        ]
+    )

-    crew.train(n_iterations=2)
+    crew_training_handler.assert_has_calls(
+        [
+            mock.call("training_data.pkl"),
+            mock.call().load(),
+            mock.call("trained_agents_data.pkl"),
+            mock.call().save_trained_data(
+                agent_id="Researcher",
+                trained_data=task_evaluator().evaluate_training_data().model_dump(),
+            ),
+            mock.call("trained_agents_data.pkl"),
+            mock.call().save_trained_data(
+                agent_id="Senior Writer",
+                trained_data=task_evaluator().evaluate_training_data().model_dump(),
+            ),
+            mock.call(),
+            mock.call().load(),
+            mock.call(),
+            mock.call().load(),
+        ]
+    )
+
+    kickoff.assert_has_calls(
+        [mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
+    )


 def test_crew_train_error():
@@ -1373,3 +1442,32 @@ def test_crew_train_error():
        assert "train() missing 1 required positional argument: 'n_iterations'" in str(
            e
        )
+
+
+def test__setup_for_training():
+    researcher.allow_delegation = True
+    writer.allow_delegation = True
+    agents = [researcher, writer]
+    task = Task(
+        description="Come up with a list of 5 interesting ideas to explore for an article",
+        expected_output="5 bullet points with a paragraph for each idea.",
+    )
+
+    crew = Crew(
+        agents=agents,
+        tasks=[task],
+    )
+
+    assert crew._train is False
+    assert task.human_input is False
+
+    for agent in agents:
+        assert agent.allow_delegation is True
+
+    crew._setup_for_training()
+
+    assert crew._train is True
+    assert task.human_input is True
+
+    for agent in agents:
+        assert agent.allow_delegation is False
--- a/tests/task_test.py
+++ b/tests/task_test.py
@@ -1,7 +1,6 @@
 """Test Agent creation and execution basic functionality."""

 import json
-
 from unittest.mock import MagicMock, patch

 import pytest
--- a/tests/utilities/evaluators/test_task_evaluator.py
+++ b/tests/utilities/evaluators/test_task_evaluator.py
@@ -0,0 +1,64 @@
+from unittest import mock
+from unittest.mock import MagicMock, patch
+
+from crewai.utilities.evaluators.task_evaluator import (
+    TaskEvaluator,
+    TrainingTaskEvaluation,
+)
+
+
+@patch("crewai.utilities.evaluators.task_evaluator.Converter")
+def test_evaluate_training_data(converter_mock):
+    training_data = {
+        "agent_id": {
+            "data1": {
+                "initial_output": "Initial output 1",
+                "human_feedback": "Human feedback 1",
+                "improved_output": "Improved output 1",
+            },
+            "data2": {
+                "initial_output": "Initial output 2",
+                "human_feedback": "Human feedback 2",
+                "improved_output": "Improved output 2",
+            },
+        }
+    }
+    agent_id = "agent_id"
+    original_agent = MagicMock()
+    function_return_value = TrainingTaskEvaluation(
+        suggestions=[
+            "The initial output was already good, having a detailed explanation. However, the improved output "
+            "gave similar information but in a more professional manner using better vocabulary. For future tasks, "
+            "try to implement more elaborate language and precise terminology from the beginning."
+        ],
+        quality=8.0,
+        final_summary="The agent responded well initially. However, the improved output showed that there is room "
+        "for enhancement in terms of language usage, precision, and professionalism. For future tasks, the agent "
+        "should focus more on these points from the start to increase performance.",
+    )
+    converter_mock.return_value.to_pydantic.return_value = function_return_value
+    result = TaskEvaluator(original_agent=original_agent).evaluate_training_data(
+        training_data, agent_id
+    )
+
+    assert result == function_return_value
+    converter_mock.assert_has_calls(
+        [
+            mock.call(
+                llm=original_agent.llm,
+                text="Assess the quality of the training data based on the llm output, human feedback , and llm "
+                "output improved result.\n\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
+                "1\n\nImproved Output:\nImproved output 1\n\nInitial Output:\nInitial output 2\n\nHuman "
+                "Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\nPlease provide:\n- "
+                "Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs "
+                "provide action items based on human_feedback for future tasks\n- A score from 0 to 10 evaluating "
+                "on completion, quality, and overall performance from the improved output to the initial output "
+                "based on the human feedback\n",
+                model=TrainingTaskEvaluation,
+                instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
+                "following structure, with the following keys:\n- suggestions: List[str]\n- "
+                "quality: float\n- final_summary: str",
+            ),
+            mock.call().to_pydantic(),
+        ]
+    )
--- a/tests/utilities/test_file_handler.py
+++ b/tests/utilities/test_file_handler.py
@@ -0,0 +1,41 @@
+import os
+import unittest
+
+import pytest
+
+from crewai.utilities.file_handler import PickleHandler
+
+
+class TestPickleHandler(unittest.TestCase):
+    def setUp(self):
+        self.file_name = "test_data.pkl"
+        self.file_path = os.path.join(os.getcwd(), self.file_name)
+        self.handler = PickleHandler(self.file_name)
+
+    def tearDown(self):
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
+
+    def test_initialize_file(self):
+        assert os.path.exists(self.file_path) is True
+        assert os.path.getsize(self.file_path) >= 0
+
+    def test_save_and_load(self):
+        data = {"key": "value"}
+        self.handler.save(data)
+        loaded_data = self.handler.load()
+        assert loaded_data == data
+
+    def test_load_empty_file(self):
+        loaded_data = self.handler.load()
+        assert loaded_data == {}
+
+    def test_load_corrupted_file(self):
+        with open(self.file_path, "wb") as file:
+            file.write(b"corrupted data")
+
+        with pytest.raises(Exception) as exc:
+            self.handler.load()
+
+        assert str(exc.value) == "pickle data was truncated"
+        assert "<class '_pickle.UnpicklingError'>" == str(exc.type)
--- a/tests/utilities/test_training_handler.py
+++ b/tests/utilities/test_training_handler.py
@@ -0,0 +1,42 @@
+import os
+import unittest
+
+from crewai.utilities.training_handler import CrewTrainingHandler
+
+
+class TestCrewTrainingHandler(unittest.TestCase):
+    def setUp(self):
+        self.handler = CrewTrainingHandler("trained_data.pkl")
+
+    def tearDown(self):
+        os.remove("trained_data.pkl")
+        del self.handler
+
+    def test_save_trained_data(self):
+        agent_id = "agent1"
+        trained_data = {"param1": 1, "param2": 2}
+        self.handler.save_trained_data(agent_id, trained_data)
+
+        # Assert that the trained data is saved correctly
+        data = self.handler.load()
+        assert data[agent_id] == trained_data
+
+    def test_append_existing_agent(self):
+        train_iteration = 1
+        agent_id = "agent1"
+        new_data = {"param3": 3, "param4": 4}
+        self.handler.append(train_iteration, agent_id, new_data)
+
+        # Assert that the new data is appended correctly to the existing agent
+        data = self.handler.load()
+        assert data[agent_id][train_iteration] == new_data
+
+    def test_append_new_agent(self):
+        train_iteration = 1
+        agent_id = "agent2"
+        new_data = {"param5": 5, "param6": 6}
+        self.handler.append(train_iteration, agent_id, new_data)
+
+        # Assert that the new agent and data are appended correctly
+        data = self.handler.load()
+        assert data[agent_id][train_iteration] == new_data