WIP fixed mypy src types

feat: Add execution time to both task and testing feature (#1031 )
* feat: Add execution time to both task and testing feature * feat: Remove unused functions * feat: change test_crew to evalaute_crew to avoid issues with testing libs * feat: fix tests
2026-02-05 05:28:25 +00:00 · 2024-07-30 08:32:59 -07:00 · 2024-07-29 23:17:07 -03:00 · 2024-07-29 23:03:51 -03:00 · 2024-07-29 23:03:32 -03:00 · 2024-07-29 23:02:48 -03:00
14 changed files with 91 additions and 52 deletions
--- a/docs/getting-started/Start-a-New-CrewAI-Project-Template-Method.md
+++ b/docs/getting-started/Start-a-New-CrewAI-Project-Template-Method.md
@@ -17,7 +17,7 @@ Beforre we start there are a couple of things to note:
 Before getting started with CrewAI, make sure that you have installed it via pip:

 ```shell
-$ pip install crewai crewi-tools
+$ pip install crewai crewai-tools
 ```

 ### Virtual Environemnts
--- a/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
+++ b/src/crewai/agents/agent_builder/base_agent_executor_mixin.py
@@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Optional

 from crewai.memory.entity.entity_memory_item import EntityMemoryItem
 from crewai.memory.long_term.long_term_memory_item import LongTermMemoryItem
-from crewai.memory.short_term.short_term_memory_item import ShortTermMemoryItem
 from crewai.utilities.converter import ConverterError
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
 from crewai.utilities import I18N
@@ -39,18 +38,17 @@ class CrewAgentExecutorMixin:
            and "Action: Delegate work to coworker" not in output.log
        ):
            try:
-                memory = ShortTermMemoryItem(
-                    data=output.log,
-                    agent=self.crew_agent.role,
-                    metadata={
-                        "observation": self.task.description,
-                    },
-                )
                if (
                    hasattr(self.crew, "_short_term_memory")
                    and self.crew._short_term_memory
                ):
-                    self.crew._short_term_memory.save(memory)
+                    self.crew._short_term_memory.save(
+                        value=output.log,
+                        metadata={
+                            "observation": self.task.description,
+                        },
+                        agent=self.crew_agent.role,
+                    )
            except Exception as e:
                print(f"Failed to add to short term memory: {e}")
                pass
--- a/src/crewai/cli/cli.py
+++ b/src/crewai/cli/cli.py
@@ -6,9 +6,9 @@ from crewai.memory.storage.kickoff_task_outputs_storage import (
 )

 from .create_crew import create_crew
+from .evaluate_crew import evaluate_crew
 from .replay_from_task import replay_task_command
 from .reset_memories_command import reset_memories_command
-from .test_crew import test_crew
 from .train_crew import train_crew


@@ -144,7 +144,7 @@ def reset_memories(long, short, entities, kickoff_outputs, all):
 def test(n_iterations: int, model: str):
    """Test the crew and evaluate the results."""
    click.echo(f"Testing the crew for {n_iterations} iterations with model {model}")
-    test_crew(n_iterations, model)
+    evaluate_crew(n_iterations, model)


 if __name__ == "__main__":
--- a/src/crewai/cli/evaluate_crew.py
+++ b/src/crewai/cli/evaluate_crew.py
@@ -1,13 +1,11 @@
 import subprocess
+
 import click
-import pytest
-
-pytest.skip(allow_module_level=True)


-def test_crew(n_iterations: int, model: str) -> None:
+def evaluate_crew(n_iterations: int, model: str) -> None:
    """
-    Test the crew by running a command in the Poetry environment.
+    Test and Evaluate the crew by running a command in the Poetry environment.

    Args:
        n_iterations (int): The number of iterations to test the crew.
--- a/src/crewai/memory/short_term/short_term_memory.py
+++ b/src/crewai/memory/short_term/short_term_memory.py
@@ -1,3 +1,4 @@
+from typing import Any, Dict, Optional
 from crewai.memory.memory import Memory
 from crewai.memory.short_term.short_term_memory_item import ShortTermMemoryItem
 from crewai.memory.storage.rag_storage import RAGStorage
@@ -18,7 +19,14 @@ class ShortTermMemory(Memory):
        )
        super().__init__(storage)

-    def save(self, item: ShortTermMemoryItem) -> None:
+    def save(
+        self,
+        value: Any,
+        metadata: Optional[Dict[str, Any]] = None,
+        agent: Optional[str] = None,
+    ) -> None:
+        item = ShortTermMemoryItem(data=value, metadata=metadata, agent=agent)
+
        super().save(value=item.data, metadata=item.metadata, agent=item.agent)

    def search(self, query: str, score_threshold: float = 0.35):
--- a/src/crewai/memory/short_term/short_term_memory_item.py
+++ b/src/crewai/memory/short_term/short_term_memory_item.py
@@ -3,7 +3,10 @@ from typing import Any, Dict, Optional

 class ShortTermMemoryItem:
    def __init__(
-        self, data: Any, agent: str, metadata: Optional[Dict[str, Any]] = None
+        self,
+        data: Any,
+        agent: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
    ):
        self.data = data
        self.agent = agent
--- a/src/crewai/memory/storage/interface.py
+++ b/src/crewai/memory/storage/interface.py
@@ -4,7 +4,7 @@ from typing import Any, Dict
 class Storage:
    """Abstract base class defining the storage interface"""

-    def save(self, key: str, value: Any, metadata: Dict[str, Any]) -> None:
+    def save(self, value: Any, metadata: Dict[str, Any]) -> None:
        pass

    def search(self, key: str) -> Dict[str, Any]:  # type: ignore
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -1,3 +1,4 @@
+import datetime
 import json
 import os
 import threading
@@ -107,6 +108,7 @@ class Task(BaseModel):
    _original_description: str | None = None
    _original_expected_output: str | None = None
    _thread: threading.Thread | None = None
+    _execution_time: float | None = None

    def __init__(__pydantic_self__, **data):
        config = data.pop("config", {})
@@ -120,6 +122,12 @@ class Task(BaseModel):
                "may_not_set_field", "This field is not to be set by the user.", {}
            )

+    def _set_start_execution_time(self) -> float:
+        return datetime.datetime.now().timestamp()
+
+    def _set_end_execution_time(self, start_time: float) -> None:
+        self._execution_time = datetime.datetime.now().timestamp() - start_time
+
    @field_validator("output_file")
    @classmethod
    def output_file_validation(cls, value: str) -> str:
@@ -216,6 +224,7 @@ class Task(BaseModel):
                f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
            )

+        start_time = self._set_start_execution_time()
        self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)

        self.prompt_context = context
@@ -239,6 +248,7 @@ class Task(BaseModel):
        )
        self.output = task_output

+        self._set_end_execution_time(start_time)
        if self.callback:
            self.callback(self.output)

@@ -250,7 +260,9 @@ class Task(BaseModel):
            content = (
                json_output
                if json_output
-                else pydantic_output.model_dump_json() if pydantic_output else result
+                else pydantic_output.model_dump_json()
+                if pydantic_output
+                else result
            )
            self._save_file(content)

--- a/src/crewai/telemetry/telemetry.py
+++ b/src/crewai/telemetry/telemetry.py
@@ -40,7 +40,7 @@ class Telemetry:
    - Roles of agents in a crew
    - Tools names available

-    Users can opt-in to sharing more complete data suing the `share_crew`
+    Users can opt-in to sharing more complete data using the `share_crew`
    attribute in the Crew class.
    """

--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -28,6 +28,7 @@ class CrewEvaluator:
    """

    tasks_scores: defaultdict = defaultdict(list)
+    run_execution_times: defaultdict = defaultdict(list)
    iteration: int = 0

    def __init__(self, crew, openai_model_name: str):
@@ -40,9 +41,6 @@ class CrewEvaluator:
        for task in self.crew.tasks:
            task.callback = self.evaluate

-    def set_iteration(self, iteration: int) -> None:
-        self.iteration = iteration
-
    def _evaluator_agent(self):
        return Agent(
            role="Task Execution Evaluator",
@@ -71,6 +69,9 @@ class CrewEvaluator:
            output_pydantic=TaskEvaluationPydanticOutput,
        )

+    def set_iteration(self, iteration: int) -> None:
+        self.iteration = iteration
+
    def print_crew_evaluation_result(self) -> None:
        """
        Prints the evaluation result of the crew in a table.
@@ -119,6 +120,16 @@ class CrewEvaluator:
        ]
        table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")

+        run_exec_times = [
+            int(sum(tasks_exec_times))
+            for _, tasks_exec_times in self.run_execution_times.items()
+        ]
+        execution_time_avg = int(sum(run_exec_times) / len(run_exec_times))
+        table.add_row(
+            "Execution Time (s)",
+            *map(str, run_exec_times),
+            f"{execution_time_avg}",
+        )
        # Display the table in the terminal
        console = Console()
        console.print(table)
@@ -145,5 +156,8 @@ class CrewEvaluator:

        if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
+            self.run_execution_times[self.iteration].append(
+                current_task._execution_time
+            )
        else:
            raise ValueError("Evaluation result is not in the expected format")
--- a/tests/cli/cli_test.py
+++ b/tests/cli/cli_test.py
@@ -135,29 +135,29 @@ def test_version_command_with_tools(runner):
    )


-@mock.patch("crewai.cli.cli.test_crew")
-def test_test_default_iterations(test_crew, runner):
+@mock.patch("crewai.cli.cli.evaluate_crew")
+def test_test_default_iterations(evaluate_crew, runner):
    result = runner.invoke(test)

-    test_crew.assert_called_once_with(3, "gpt-4o-mini")
+    evaluate_crew.assert_called_once_with(3, "gpt-4o-mini")
    assert result.exit_code == 0
    assert "Testing the crew for 3 iterations with model gpt-4o-mini" in result.output


-@mock.patch("crewai.cli.cli.test_crew")
-def test_test_custom_iterations(test_crew, runner):
+@mock.patch("crewai.cli.cli.evaluate_crew")
+def test_test_custom_iterations(evaluate_crew, runner):
    result = runner.invoke(test, ["--n_iterations", "5", "--model", "gpt-4o"])

-    test_crew.assert_called_once_with(5, "gpt-4o")
+    evaluate_crew.assert_called_once_with(5, "gpt-4o")
    assert result.exit_code == 0
    assert "Testing the crew for 5 iterations with model gpt-4o" in result.output


-@mock.patch("crewai.cli.cli.test_crew")
-def test_test_invalid_string_iterations(test_crew, runner):
+@mock.patch("crewai.cli.cli.evaluate_crew")
+def test_test_invalid_string_iterations(evaluate_crew, runner):
    result = runner.invoke(test, ["--n_iterations", "invalid"])

-    test_crew.assert_not_called()
+    evaluate_crew.assert_not_called()
    assert result.exit_code == 2
    assert (
        "Usage: test [OPTIONS]\nTry 'test --help' for help.\n\nError: Invalid value for '-n' / '--n_iterations': 'invalid' is not a valid integer.\n"
--- a/tests/cli/test_crew_test.py
+++ b/tests/cli/test_crew_test.py
@@ -3,7 +3,7 @@ from unittest import mock

 import pytest

-from crewai.cli import test_crew
+from crewai.cli import evaluate_crew


@pytest.mark.parametrize(
@@ -14,13 +14,13 @@ from crewai.cli import test_crew
        (10, "gpt-4"),
    ],
 )
-@mock.patch("crewai.cli.test_crew.subprocess.run")
+@mock.patch("crewai.cli.evaluate_crew.subprocess.run")
 def test_crew_success(mock_subprocess_run, n_iterations, model):
    """Test the crew function for successful execution."""
    mock_subprocess_run.return_value = subprocess.CompletedProcess(
        args=f"poetry run test {n_iterations} {model}", returncode=0
    )
-    result = test_crew.test_crew(n_iterations, model)
+    result = evaluate_crew.evaluate_crew(n_iterations, model)

    mock_subprocess_run.assert_called_once_with(
        ["poetry", "run", "test", str(n_iterations), model],
@@ -31,26 +31,26 @@ def test_crew_success(mock_subprocess_run, n_iterations, model):
    assert result is None


-@mock.patch("crewai.cli.test_crew.click")
+@mock.patch("crewai.cli.evaluate_crew.click")
 def test_test_crew_zero_iterations(click):
-    test_crew.test_crew(0, "gpt-4o")
+    evaluate_crew.evaluate_crew(0, "gpt-4o")
    click.echo.assert_called_once_with(
        "An unexpected error occurred: The number of iterations must be a positive integer.",
        err=True,
    )


-@mock.patch("crewai.cli.test_crew.click")
+@mock.patch("crewai.cli.evaluate_crew.click")
 def test_test_crew_negative_iterations(click):
-    test_crew.test_crew(-2, "gpt-4o")
+    evaluate_crew.evaluate_crew(-2, "gpt-4o")
    click.echo.assert_called_once_with(
        "An unexpected error occurred: The number of iterations must be a positive integer.",
        err=True,
    )


-@mock.patch("crewai.cli.test_crew.click")
-@mock.patch("crewai.cli.test_crew.subprocess.run")
+@mock.patch("crewai.cli.evaluate_crew.click")
+@mock.patch("crewai.cli.evaluate_crew.subprocess.run")
 def test_test_crew_called_process_error(mock_subprocess_run, click):
    n_iterations = 5
    mock_subprocess_run.side_effect = subprocess.CalledProcessError(
@@ -59,7 +59,7 @@ def test_test_crew_called_process_error(mock_subprocess_run, click):
        output="Error",
        stderr="Some error occurred",
    )
-    test_crew.test_crew(n_iterations, "gpt-4o")
+    evaluate_crew.evaluate_crew(n_iterations, "gpt-4o")

    mock_subprocess_run.assert_called_once_with(
        ["poetry", "run", "test", "5", "gpt-4o"],
@@ -78,13 +78,13 @@ def test_test_crew_called_process_error(mock_subprocess_run, click):
    )


-@mock.patch("crewai.cli.test_crew.click")
-@mock.patch("crewai.cli.test_crew.subprocess.run")
+@mock.patch("crewai.cli.evaluate_crew.click")
+@mock.patch("crewai.cli.evaluate_crew.subprocess.run")
 def test_test_crew_unexpected_exception(mock_subprocess_run, click):
    # Arrange
    n_iterations = 5
    mock_subprocess_run.side_effect = Exception("Unexpected error")
-    test_crew.test_crew(n_iterations, "gpt-4o")
+    evaluate_crew.evaluate_crew(n_iterations, "gpt-4o")

    mock_subprocess_run.assert_called_once_with(
        ["poetry", "run", "test", "5", "gpt-4o"],
--- a/tests/memory/short_term_memory_test.py
+++ b/tests/memory/short_term_memory_test.py
@@ -23,10 +23,7 @@ def short_term_memory():
        expected_output="A list of relevant URLs based on the search query.",
        agent=agent,
    )
-    return ShortTermMemory(crew=Crew(
-        agents=[agent],
-        tasks=[task]
-    ))
+    return ShortTermMemory(crew=Crew(agents=[agent], tasks=[task]))


@pytest.mark.vcr(filter_headers=["authorization"])
@@ -38,7 +35,11 @@ def test_save_and_search(short_term_memory):
        agent="test_agent",
        metadata={"task": "test_task"},
    )
-    short_term_memory.save(memory)
+    short_term_memory.save(
+        value=memory.data,
+        metadata=memory.metadata,
+        agent=memory.agent,
+    )

    find = short_term_memory.search("test value", score_threshold=0.01)[0]
    assert find["context"] == memory.data, "Data value mismatch."
--- a/tests/utilities/evaluators/test_crew_evaluator_handler.py
+++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py
@@ -84,6 +84,10 @@ class TestCrewEvaluator:
            1: [10, 9, 8],
            2: [9, 8, 7],
        }
+        crew_planner.run_execution_times = {
+            1: [24, 45, 66],
+            2: [55, 33, 67],
+        }

        crew_planner.print_crew_evaluation_result()

@@ -98,6 +102,7 @@ class TestCrewEvaluator:
                mock.call().add_row("Task 2", "9", "8", "8.5"),
                mock.call().add_row("Task 3", "8", "7", "7.5"),
                mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
+                mock.call().add_row("Execution Time (s)", "135", "155", "145"),
            ]
        )
        console.assert_has_calls([mock.call(), mock.call().print(table())])
Author	SHA1	Message	Date
Lorenze Jay	319f0301ef	WIP fixed mypy src types	2024-07-30 08:32:59 -07:00
Eduardo Chiarotti	d824db82a3	feat: Add execution time to both task and testing feature (#1031 ) * feat: Add execution time to both task and testing feature * feat: Remove unused functions * feat: change test_crew to evalaute_crew to avoid issues with testing libs * feat: fix tests	2024-07-29 23:17:07 -03:00
Matt Young	de6b597eff	telemetry.py - fix typo in comment. (#1020 )	2024-07-29 23:03:51 -03:00
Deepak Tammali	6111d05219	docs: Fix crewai-tools package name typo in getting-started docs (#1026 )	2024-07-29 23:03:32 -03:00
Monarch Wadia	f83c91d612	Fixed package name typo in pip install command (#1029 ) Changed `pip install crewai-tools` to `pip install crewai-tools`	2024-07-29 23:02:48 -03:00
Mackensie Alvarez	c8f360414e	Update Start-a-New-CrewAI-Project-Template-Method.md (#1030 )	2024-07-29 23:02:18 -03:00