Merge in main to bugfix/kickoff-for-each-usage-metrics

2026-05-06 09:42:39 +00:00 · 2024-07-01 14:00:13 -04:00
parent 1d2827e9a5
commit 2efe16eac9
54 changed files with 411517 additions and 6465 deletions
--- a/tests/utilities/evaluators/test_task_evaluator.py
+++ b/tests/utilities/evaluators/test_task_evaluator.py
@@ -0,0 +1,64 @@
+from unittest import mock
+from unittest.mock import MagicMock, patch
+
+from crewai.utilities.evaluators.task_evaluator import (
+    TaskEvaluator,
+    TrainingTaskEvaluation,
+)
+
+
+@patch("crewai.utilities.evaluators.task_evaluator.Converter")
+def test_evaluate_training_data(converter_mock):
+    training_data = {
+        "agent_id": {
+            "data1": {
+                "initial_output": "Initial output 1",
+                "human_feedback": "Human feedback 1",
+                "improved_output": "Improved output 1",
+            },
+            "data2": {
+                "initial_output": "Initial output 2",
+                "human_feedback": "Human feedback 2",
+                "improved_output": "Improved output 2",
+            },
+        }
+    }
+    agent_id = "agent_id"
+    original_agent = MagicMock()
+    function_return_value = TrainingTaskEvaluation(
+        suggestions=[
+            "The initial output was already good, having a detailed explanation. However, the improved output "
+            "gave similar information but in a more professional manner using better vocabulary. For future tasks, "
+            "try to implement more elaborate language and precise terminology from the beginning."
+        ],
+        quality=8.0,
+        final_summary="The agent responded well initially. However, the improved output showed that there is room "
+        "for enhancement in terms of language usage, precision, and professionalism. For future tasks, the agent "
+        "should focus more on these points from the start to increase performance.",
+    )
+    converter_mock.return_value.to_pydantic.return_value = function_return_value
+    result = TaskEvaluator(original_agent=original_agent).evaluate_training_data(
+        training_data, agent_id
+    )
+
+    assert result == function_return_value
+    converter_mock.assert_has_calls(
+        [
+            mock.call(
+                llm=original_agent.llm,
+                text="Assess the quality of the training data based on the llm output, human feedback , and llm "
+                "output improved result.\n\nInitial Output:\nInitial output 1\n\nHuman Feedback:\nHuman feedback "
+                "1\n\nImproved Output:\nImproved output 1\n\nInitial Output:\nInitial output 2\n\nHuman "
+                "Feedback:\nHuman feedback 2\n\nImproved Output:\nImproved output 2\n\nPlease provide:\n- "
+                "Based on the Human Feedbacks and the comparison between Initial Outputs and Improved outputs "
+                "provide action items based on human_feedback for future tasks\n- A score from 0 to 10 evaluating "
+                "on completion, quality, and overall performance from the improved output to the initial output "
+                "based on the human feedback\n",
+                model=TrainingTaskEvaluation,
+                instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
+                "following structure, with the following keys:\n- suggestions: List[str]\n- "
+                "quality: float\n- final_summary: str",
+            ),
+            mock.call().to_pydantic(),
+        ]
+    )
--- a/tests/utilities/test_file_handler.py
+++ b/tests/utilities/test_file_handler.py
@@ -0,0 +1,41 @@
+import os
+import unittest
+
+import pytest
+
+from crewai.utilities.file_handler import PickleHandler
+
+
+class TestPickleHandler(unittest.TestCase):
+    def setUp(self):
+        self.file_name = "test_data.pkl"
+        self.file_path = os.path.join(os.getcwd(), self.file_name)
+        self.handler = PickleHandler(self.file_name)
+
+    def tearDown(self):
+        if os.path.exists(self.file_path):
+            os.remove(self.file_path)
+
+    def test_initialize_file(self):
+        assert os.path.exists(self.file_path) is True
+        assert os.path.getsize(self.file_path) >= 0
+
+    def test_save_and_load(self):
+        data = {"key": "value"}
+        self.handler.save(data)
+        loaded_data = self.handler.load()
+        assert loaded_data == data
+
+    def test_load_empty_file(self):
+        loaded_data = self.handler.load()
+        assert loaded_data == {}
+
+    def test_load_corrupted_file(self):
+        with open(self.file_path, "wb") as file:
+            file.write(b"corrupted data")
+
+        with pytest.raises(Exception) as exc:
+            self.handler.load()
+
+        assert str(exc.value) == "pickle data was truncated"
+        assert "<class '_pickle.UnpicklingError'>" == str(exc.type)
--- a/tests/utilities/test_training_handler.py
+++ b/tests/utilities/test_training_handler.py
@@ -0,0 +1,42 @@
+import os
+import unittest
+
+from crewai.utilities.training_handler import CrewTrainingHandler
+
+
+class TestCrewTrainingHandler(unittest.TestCase):
+    def setUp(self):
+        self.handler = CrewTrainingHandler("trained_data.pkl")
+
+    def tearDown(self):
+        os.remove("trained_data.pkl")
+        del self.handler
+
+    def test_save_trained_data(self):
+        agent_id = "agent1"
+        trained_data = {"param1": 1, "param2": 2}
+        self.handler.save_trained_data(agent_id, trained_data)
+
+        # Assert that the trained data is saved correctly
+        data = self.handler.load()
+        assert data[agent_id] == trained_data
+
+    def test_append_existing_agent(self):
+        train_iteration = 1
+        agent_id = "agent1"
+        new_data = {"param3": 3, "param4": 4}
+        self.handler.append(train_iteration, agent_id, new_data)
+
+        # Assert that the new data is appended correctly to the existing agent
+        data = self.handler.load()
+        assert data[agent_id][train_iteration] == new_data
+
+    def test_append_new_agent(self):
+        train_iteration = 1
+        agent_id = "agent2"
+        new_data = {"param5": 5, "param6": 6}
+        self.handler.append(train_iteration, agent_id, new_data)
+
+        # Assert that the new agent and data are appended correctly
+        data = self.handler.load()
+        assert data[agent_id][train_iteration] == new_data