[ENG-227] Record task execution timestamps

2025-12-16 12:28:30 +00:00 · 2025-01-03 14:54:29 -03:00
4 changed files with 252 additions and 50 deletions
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -127,38 +127,41 @@ class Task(BaseModel):
    processed_by_agents: Set[str] = Field(default_factory=set)
    guardrail: Optional[Callable[[TaskOutput], Tuple[bool, Any]]] = Field(
        default=None,
-        description="Function to validate task output before proceeding to next task"
+        description="Function to validate task output before proceeding to next task",
    )
    max_retries: int = Field(
-        default=3,
-        description="Maximum number of retries when guardrail fails"
+        default=3, description="Maximum number of retries when guardrail fails"
    )
-    retry_count: int = Field(
-        default=0,
-        description="Current number of retries"
+    retry_count: int = Field(default=0, description="Current number of retries")
+
+    start_time: Optional[datetime.datetime] = Field(
+        default=None, description="Start time of the task execution"
+    )
+    end_time: Optional[datetime.datetime] = Field(
+        default=None, description="End time of the task execution"
    )

    @field_validator("guardrail")
    @classmethod
    def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
        """Validate that the guardrail function has the correct signature and behavior.
-        
+
        While type hints provide static checking, this validator ensures runtime safety by:
        1. Verifying the function accepts exactly one parameter (the TaskOutput)
        2. Checking return type annotations match Tuple[bool, Any] if present
        3. Providing clear, immediate error messages for debugging
-        
+
        This runtime validation is crucial because:
        - Type hints are optional and can be ignored at runtime
        - Function signatures need immediate validation before task execution
        - Clear error messages help users debug guardrail implementation issues
-        
+
        Args:
            v: The guardrail function to validate
-            
+
        Returns:
            The validated guardrail function
-            
+
        Raises:
            ValueError: If the function signature is invalid or return annotation
                       doesn't match Tuple[bool, Any]
@@ -171,8 +174,13 @@ class Task(BaseModel):
            # Check return annotation if present, but don't require it
            return_annotation = sig.return_annotation
            if return_annotation != inspect.Signature.empty:
-                if not (return_annotation == Tuple[bool, Any] or str(return_annotation) == 'Tuple[bool, Any]'):
-                    raise ValueError("If return type is annotated, it must be Tuple[bool, Any]")
+                if not (
+                    return_annotation == Tuple[bool, Any]
+                    or str(return_annotation) == "Tuple[bool, Any]"
+                ):
+                    raise ValueError(
+                        "If return type is annotated, it must be Tuple[bool, Any]"
+                    )
        return v

    _telemetry: Telemetry = PrivateAttr(default_factory=Telemetry)
@@ -181,7 +189,6 @@ class Task(BaseModel):
    _original_expected_output: Optional[str] = PrivateAttr(default=None)
    _original_output_file: Optional[str] = PrivateAttr(default=None)
    _thread: Optional[threading.Thread] = PrivateAttr(default=None)
-    _execution_time: Optional[float] = PrivateAttr(default=None)

    @model_validator(mode="before")
    @classmethod
@@ -206,25 +213,19 @@ class Task(BaseModel):
                "may_not_set_field", "This field is not to be set by the user.", {}
            )

-    def _set_start_execution_time(self) -> float:
-        return datetime.datetime.now().timestamp()
-
-    def _set_end_execution_time(self, start_time: float) -> None:
-        self._execution_time = datetime.datetime.now().timestamp() - start_time
-
    @field_validator("output_file")
    @classmethod
    def output_file_validation(cls, value: Optional[str]) -> Optional[str]:
        """Validate the output file path.
-        
+
        Args:
            value: The output file path to validate. Can be None or a string.
                  If the path contains template variables (e.g. {var}), leading slashes are preserved.
                  For regular paths, leading slashes are stripped.
-        
+
        Returns:
            The validated and potentially modified path, or None if no path was provided.
-            
+
        Raises:
            ValueError: If the path contains invalid characters, path traversal attempts,
                      or other security concerns.
@@ -234,18 +235,24 @@ class Task(BaseModel):

        # Basic security checks
        if ".." in value:
-            raise ValueError("Path traversal attempts are not allowed in output_file paths")
-        
+            raise ValueError(
+                "Path traversal attempts are not allowed in output_file paths"
+            )
+
        # Check for shell expansion first
-        if value.startswith('~') or value.startswith('$'):
-            raise ValueError("Shell expansion characters are not allowed in output_file paths")
-            
+        if value.startswith("~") or value.startswith("$"):
+            raise ValueError(
+                "Shell expansion characters are not allowed in output_file paths"
+            )
+
        # Then check other shell special characters
-        if any(char in value for char in ['|', '>', '<', '&', ';']):
-            raise ValueError("Shell special characters are not allowed in output_file paths")
+        if any(char in value for char in ["|", ">", "<", "&", ";"]):
+            raise ValueError(
+                "Shell special characters are not allowed in output_file paths"
+            )

        # Don't strip leading slash if it's a template path with variables
-        if "{" in value or "}" in value: 
+        if "{" in value or "}" in value:
            # Validate template variable format
            template_vars = [part.split("}")[0] for part in value.split("{")[1:]]
            for var in template_vars:
@@ -302,6 +309,12 @@ class Task(BaseModel):

        return md5("|".join(source).encode(), usedforsecurity=False).hexdigest()

+    @property
+    def execution_duration(self) -> float | None:
+        if not self.start_time or not self.end_time:
+            return None
+        return (self.end_time - self.start_time).total_seconds()
+
    def execute_async(
        self,
        agent: BaseAgent | None = None,
@@ -342,7 +355,7 @@ class Task(BaseModel):
                f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
            )

-        start_time = self._set_start_execution_time()
+        self.start_time = datetime.datetime.now()
        self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)

        self.prompt_context = context
@@ -392,15 +405,17 @@ class Task(BaseModel):

            if isinstance(guardrail_result.result, str):
                task_output.raw = guardrail_result.result
-                pydantic_output, json_output = self._export_output(guardrail_result.result)
+                pydantic_output, json_output = self._export_output(
+                    guardrail_result.result
+                )
                task_output.pydantic = pydantic_output
                task_output.json_dict = json_output
            elif isinstance(guardrail_result.result, TaskOutput):
                task_output = guardrail_result.result

        self.output = task_output
+        self.end_time = datetime.datetime.now()

-        self._set_end_execution_time(start_time)
        if self.callback:
            self.callback(self.output)

@@ -412,7 +427,9 @@ class Task(BaseModel):
            content = (
                json_output
                if json_output
-                else pydantic_output.model_dump_json() if pydantic_output else result
+                else pydantic_output.model_dump_json()
+                if pydantic_output
+                else result
            )
            self._save_file(content)

@@ -434,11 +451,11 @@ class Task(BaseModel):

    def interpolate_inputs(self, inputs: Dict[str, Union[str, int, float]]) -> None:
        """Interpolate inputs into the task description, expected output, and output file path.
-        
+
        Args:
            inputs: Dictionary mapping template variables to their values.
                   Supported value types are strings, integers, and floats.
-        
+
        Raises:
            ValueError: If a required template variable is missing from inputs.
        """
@@ -455,7 +472,9 @@ class Task(BaseModel):
        try:
            self.description = self._original_description.format(**inputs)
        except KeyError as e:
-            raise ValueError(f"Missing required template variable '{e.args[0]}' in description") from e
+            raise ValueError(
+                f"Missing required template variable '{e.args[0]}' in description"
+            ) from e
        except ValueError as e:
            raise ValueError(f"Error interpolating description: {str(e)}") from e

@@ -472,22 +491,26 @@ class Task(BaseModel):
                    input_string=self._original_output_file, inputs=inputs
                )
            except (KeyError, ValueError) as e:
-                raise ValueError(f"Error interpolating output_file path: {str(e)}") from e
+                raise ValueError(
+                    f"Error interpolating output_file path: {str(e)}"
+                ) from e

-    def interpolate_only(self, input_string: Optional[str], inputs: Dict[str, Union[str, int, float]]) -> str:
+    def interpolate_only(
+        self, input_string: Optional[str], inputs: Dict[str, Union[str, int, float]]
+    ) -> str:
        """Interpolate placeholders (e.g., {key}) in a string while leaving JSON untouched.
-        
+
        Args:
            input_string: The string containing template variables to interpolate.
                         Can be None or empty, in which case an empty string is returned.
            inputs: Dictionary mapping template variables to their values.
                   Supported value types are strings, integers, and floats.
                   If input_string is empty or has no placeholders, inputs can be empty.
-        
+
        Returns:
            The interpolated string with all template variables replaced with their values.
            Empty string if input_string is None or empty.
-            
+
        Raises:
            ValueError: If a required template variable is missing from inputs.
            KeyError: If a template variable is not found in the inputs dictionary.
@@ -497,13 +520,17 @@ class Task(BaseModel):
        if "{" not in input_string and "}" not in input_string:
            return input_string
        if not inputs:
-            raise ValueError("Inputs dictionary cannot be empty when interpolating variables")
+            raise ValueError(
+                "Inputs dictionary cannot be empty when interpolating variables"
+            )

        try:
            # Validate input types
            for key, value in inputs.items():
                if not isinstance(value, (str, int, float)):
-                    raise ValueError(f"Value for key '{key}' must be a string, integer, or float, got {type(value).__name__}")
+                    raise ValueError(
+                        f"Value for key '{key}' must be a string, integer, or float, got {type(value).__name__}"
+                    )

            escaped_string = input_string.replace("{", "{{").replace("}", "}}")

@@ -512,7 +539,9 @@ class Task(BaseModel):

            return escaped_string.format(**inputs)
        except KeyError as e:
-            raise KeyError(f"Template variable '{e.args[0]}' not found in inputs dictionary") from e
+            raise KeyError(
+                f"Template variable '{e.args[0]}' not found in inputs dictionary"
+            ) from e
        except ValueError as e:
            raise ValueError(f"Error during string interpolation: {str(e)}") from e

@@ -597,10 +626,10 @@ class Task(BaseModel):

    def _save_file(self, result: Any) -> None:
        """Save task output to a file.
-        
+
        Args:
            result: The result to save to the file. Can be a dict or any stringifiable object.
-            
+
        Raises:
            ValueError: If output_file is not set
            RuntimeError: If there is an error writing to the file
@@ -618,6 +647,7 @@ class Task(BaseModel):
            with resolved_path.open("w", encoding="utf-8") as file:
                if isinstance(result, dict):
                    import json
+
                    json.dump(result, file, ensure_ascii=False, indent=2)
                else:
                    file.write(str(result))
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -180,12 +180,12 @@ class CrewEvaluator:
            self._test_result_span = self._telemetry.individual_test_result_span(
                self.crew,
                evaluation_result.pydantic.quality,
-                current_task._execution_time,
+                current_task.execution_duration,
                self.openai_model_name,
            )
            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
            self.run_execution_times[self.iteration].append(
-                current_task._execution_time
+                current_task.execution_duration
            )
        else:
            raise ValueError("Evaluation result is not in the expected format")
--- a/tests/cassettes/test_task_execution_times.yaml
+++ b/tests/cassettes/test_task_execution_times.yaml
@@ -0,0 +1,146 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Researcher. You''re
+      an expert researcher, specialized in technology, software engineering, AI and
+      startups. You work as a freelancer and is now working on doing research and
+      analysis for a new customer.\nYour personal goal is: Make the best research
+      and analysis on content about AI and AI agents\nTo give my best complete final
+      answer to the task use the exact following format:\n\nThought: I now can give
+      a great answer\nFinal Answer: Your final answer must be the great and the most
+      complete as possible, it must be outcome described.\n\nI MUST use these formats,
+      my job depends on it!"}, {"role": "user", "content": "\nCurrent Task: Give me
+      a list of 5 interesting ideas to explore for na article, what makes them unique
+      and interesting.\n\nThis is the expect criteria for your final answer: Bullet
+      point list of 5 interesting ideas.\nyou MUST return the actual complete content
+      as the final answer, not a summary.\n\nBegin! This is VERY important to you,
+      use the tools available and give your best Final Answer, your job depends on
+      it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"], "stream":
+      false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1177'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.52.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.52.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-AlfwrGToOoVtDhb3ryZMpA07aZy4m\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1735926029,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
+      Answer: \\n- **The Role of Emotional Intelligence in AI Agents**: Explore how
+      developing emotional intelligence in AI can change user interactions. Investigate
+      algorithms that enable AI agents to recognize and respond to human emotions,
+      enhancing user experience in sectors such as therapy, customer service, and
+      education. This idea is unique as it blends psychology with artificial intelligence,
+      presenting a new frontier for AI applications.\\n\\n- **AI Agents in Problem-Solving
+      for Climate Change**: Analyze how AI agents can contribute to developing innovative
+      solutions for climate change challenges. Focus on their role in predicting climate
+      patterns, optimizing energy consumption, and managing resources more efficiently.
+      This topic is unique because it highlights the practical impact of AI on one
+      of the most pressing global issues.\\n\\n- **The Ethics of Autonomous Decision-Making
+      AI**: Delve into the ethical implications surrounding AI agents that make autonomous
+      decisions, especially in critical areas like healthcare, transportation, and
+      law enforcement. This idea raises questions about accountability and bias, making
+      it a vital discussion point as AI continues to advance. The unique aspect lies
+      in the intersection of technology and moral philosophy.\\n\\n- **AI Agents Shaping
+      the Future of Remote Work**: Investigate how AI agents are transforming remote
+      work environments through automation, communication facilitation, and performance
+      monitoring. Discuss unique applications such as virtual assistants, project
+      management tools, and AI-driven team collaboration platforms. This topic is
+      particularly relevant as the workforce becomes increasingly remote, making it
+      an appealing area of exploration.\\n\\n- **Cultural Impacts of AI Agents in
+      Media and Entertainment**: Examine how AI-driven characters and narratives are
+      changing the media landscape, from video games to films and animations. Analyze
+      audience reception and the role of AI in personalizing content. This concept
+      is unique due to its intersection with digital culture and artistic expression,
+      offering insights into how technology influences social norms and preferences.\",\n
+      \       \"refusal\": null\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+      \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 220,\n    \"completion_tokens\":
+      376,\n    \"total_tokens\": 596,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
+      \"fp_0aa8d3e20b\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8fc4c6324d42ad5a-POA
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 03 Jan 2025 17:40:34 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=zdRUS9YIvR7oCmJGeB7BOAnmxI7FOE5Jae5yRZDCnPE-1735926034-1.0.1.1-gvIEXrMfT69wL2mv4ApivWX67OOpDegjf1LE6g9u3GEDuQdLQok.vlLZD.SdGzK0bMug86JZhBeDZMleJlI2EQ;
+        path=/; expires=Fri, 03-Jan-25 18:10:34 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=CW_cKQGYWY3cL.S6Xo5z0cmkmWHy5Q50OA_KjPEijNk-1735926034530-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '5124'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999729'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_95ae59da1099e02c0d95bf25ba179fed
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1
--- a/tests/task_test.py
+++ b/tests/task_test.py
@@ -936,3 +936,29 @@ def test_output_file_validation():
            expected_output="Test output",
            output_file="{invalid-name}/output.txt",
        )
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_task_execution_times():
+    researcher = Agent(
+        role="Researcher",
+        goal="Make the best research and analysis on content about AI and AI agents",
+        backstory="You're an expert researcher, specialized in technology, software engineering, AI and startups. You work as a freelancer and is now working on doing research and analysis for a new customer.",
+        allow_delegation=False,
+    )
+
+    task = Task(
+        description="Give me a list of 5 interesting ideas to explore for na article, what makes them unique and interesting.",
+        expected_output="Bullet point list of 5 interesting ideas.",
+        agent=researcher,
+    )
+
+    assert task.start_time is None
+    assert task.end_time is None
+    assert task.execution_duration is None
+
+    task.execute_sync(agent=researcher)
+
+    assert task.start_time is not None
+    assert task.end_time is not None
+    assert task.execution_duration == (task.end_time - task.start_time).total_seconds()