fix: replace unsafe ast.literal_eval() with secure alternative to prevent code injection

- Remove unsafe ast.literal_eval() usage in _validate_tool_input method - Implement _safe_literal_parse() with strict input validation - Add dangerous pattern detection to block __import__, exec, eval, lambda, etc. - Support limited Python literal syntax (strings, numbers, booleans, None, lists, dicts) - Only allow specific safe characters in input - Maintain backward compatibility for valid tool inputs Fixes security vulnerability where malicious tool_input could execute arbitrary Python code. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
test
2026-02-23 06:18:29 +00:00 · 2025-10-12 19:33:28 +08:00 · 2025-10-12 16:59:57 +08:00 · 2025-10-12 16:59:21 +08:00
14 changed files with 286 additions and 608 deletions
--- a/README.md
+++ b/README.md
@@ -775,3 +775,4 @@ A: Yes, CrewAI provides extensive beginner-friendly tutorials, courses, and docu
 ### Q: Can CrewAI automate human-in-the-loop workflows?

 A: Yes, CrewAI fully supports human-in-the-loop workflows, allowing seamless collaboration between human experts and AI agents for enhanced decision-making.
+# test
--- a/src/crewai/init.py
+++ b/src/crewai/init.py
@@ -40,7 +40,7 @@ def _suppress_pydantic_deprecation_warnings() -> None:

 _suppress_pydantic_deprecation_warnings()

-__version__ = "0.203.1"
+__version__ = "0.203.0"
 _telemetry_submitted = False


--- a/src/crewai/agent.py
+++ b/src/crewai/agent.py
@@ -864,7 +864,6 @@ class Agent(BaseAgent):
            i18n=self.i18n,
            original_agent=self,
            guardrail=self.guardrail,
-            guardrail_max_retries=self.guardrail_max_retries,
        )

        return await lite_agent.kickoff_async(messages)
--- a/src/crewai/cli/authentication/utils.py
+++ b/src/crewai/cli/authentication/utils.py
@@ -30,7 +30,6 @@ def validate_jwt_token(
            algorithms=["RS256"],
            audience=audience,
            issuer=issuer,
-            leeway=10.0,
            options={
                "verify_signature": True,
                "verify_exp": True,
--- a/src/crewai/cli/templates/crew/pyproject.toml
+++ b/src/crewai/cli/templates/crew/pyproject.toml
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
 authors = [{ name = "Your Name", email = "you@example.com" }]
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]>=0.203.1,<1.0.0"
+    "crewai[tools]>=0.203.0,<1.0.0"
 ]

 [project.scripts]
--- a/src/crewai/cli/templates/flow/pyproject.toml
+++ b/src/crewai/cli/templates/flow/pyproject.toml
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
 authors = [{ name = "Your Name", email = "you@example.com" }]
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]>=0.203.1,<1.0.0",
+    "crewai[tools]>=0.203.0,<1.0.0",
 ]

 [project.scripts]
--- a/src/crewai/cli/templates/tool/pyproject.toml
+++ b/src/crewai/cli/templates/tool/pyproject.toml
@@ -5,7 +5,7 @@ description = "Power up your crews with {{folder_name}}"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
 dependencies = [
-    "crewai[tools]>=0.203.1"
+    "crewai[tools]>=0.203.0"
 ]

 [tool.crewai]
--- a/src/crewai/events/listeners/tracing/utils.py
+++ b/src/crewai/events/listeners/tracing/utils.py
@@ -358,8 +358,7 @@ def prompt_user_for_trace_viewing(timeout_seconds: int = 20) -> bool:
            try:
                response = input().strip().lower()
                result[0] = response in ["y", "yes"]
-            except (EOFError, KeyboardInterrupt, OSError, LookupError):
-                # Handle all input-related errors silently
+            except (EOFError, KeyboardInterrupt):
                result[0] = False

        input_thread = threading.Thread(target=get_input, daemon=True)
@@ -372,7 +371,6 @@ def prompt_user_for_trace_viewing(timeout_seconds: int = 20) -> bool:
        return result[0]

    except Exception:
-        # Suppress any warnings or errors and assume "no"
        return False


--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -5,9 +5,9 @@ import logging
 import threading
 import uuid
 import warnings
-from collections.abc import Callable, Sequence
+from collections.abc import Callable
 from concurrent.futures import Future
-from copy import copy as shallow_copy
+from copy import copy
 from hashlib import md5
 from pathlib import Path
 from typing import (
@@ -152,15 +152,6 @@ class Task(BaseModel):
        default=None,
        description="Function or string description of a guardrail to validate task output before proceeding to next task",
    )
-    guardrails: (
-        Sequence[Callable[[TaskOutput], tuple[bool, Any]] | str]
-        | Callable[[TaskOutput], tuple[bool, Any]]
-        | str
-        | None
-    ) = Field(
-        default=None,
-        description="List of guardrails to validate task output before proceeding to next task. Also supports a single guardrail function or string description of a guardrail to validate task output before proceeding to next task",
-    )
    max_retries: int | None = Field(
        default=None,
        description="[DEPRECATED] Maximum number of retries when guardrail fails. Use guardrail_max_retries instead. Will be removed in v1.0.0",
@@ -277,44 +268,6 @@ class Task(BaseModel):

        return self

-    @model_validator(mode="after")
-    def ensure_guardrails_is_list_of_callables(self) -> "Task":
-        guardrails = []
-        if self.guardrails is not None and (
-            not isinstance(self.guardrails, (list, tuple)) or len(self.guardrails) > 0
-        ):
-            if self.agent is None:
-                raise ValueError("Agent is required to use guardrails")
-
-            if callable(self.guardrails):
-                guardrails.append(self.guardrails)
-            elif isinstance(self.guardrails, str):
-                from crewai.tasks.llm_guardrail import LLMGuardrail
-
-                guardrails.append(
-                    LLMGuardrail(description=self.guardrails, llm=self.agent.llm)
-                )
-
-            if isinstance(self.guardrails, list):
-                for guardrail in self.guardrails:
-                    if callable(guardrail):
-                        guardrails.append(guardrail)
-                    elif isinstance(guardrail, str):
-                        from crewai.tasks.llm_guardrail import LLMGuardrail
-
-                        guardrails.append(
-                            LLMGuardrail(description=guardrail, llm=self.agent.llm)
-                        )
-                    else:
-                        raise ValueError("Guardrail must be a callable or a string")
-
-        self._guardrails = guardrails
-        if self._guardrails:
-            self.guardrail = None
-            self._guardrail = None
-
-        return self
-
    @field_validator("id", mode="before")
    @classmethod
    def _deny_user_set_id(cls, v: UUID4 | None) -> None:
@@ -503,23 +456,48 @@ class Task(BaseModel):
                output_format=self._get_output_format(),
            )

-            if self._guardrails:
-                for guardrail in self._guardrails:
-                    task_output = self._invoke_guardrail_function(
-                        task_output=task_output,
-                        agent=agent,
-                        tools=tools,
-                        guardrail=guardrail,
+            if self._guardrail:
+                guardrail_result = process_guardrail(
+                    output=task_output,
+                    guardrail=self._guardrail,
+                    retry_count=self.retry_count,
+                    event_source=self,
+                    from_task=self,
+                    from_agent=agent,
+                )
+                if not guardrail_result.success:
+                    if self.retry_count >= self.guardrail_max_retries:
+                        raise Exception(
+                            f"Task failed guardrail validation after {self.guardrail_max_retries} retries. "
+                            f"Last error: {guardrail_result.error}"
+                        )
+
+                    self.retry_count += 1
+                    context = self.i18n.errors("validation_error").format(
+                        guardrail_result_error=guardrail_result.error,
+                        task_output=task_output.raw,
+                    )
+                    printer = Printer()
+                    printer.print(
+                        content=f"Guardrail blocked, retrying, due to: {guardrail_result.error}\n",
+                        color="yellow",
+                    )
+                    return self._execute_core(agent, context, tools)
+
+                if guardrail_result.result is None:
+                    raise Exception(
+                        "Task guardrail returned None as result. This is not allowed."
                    )

-            # backwards support
-            if self._guardrail:
-                task_output = self._invoke_guardrail_function(
-                    task_output=task_output,
-                    agent=agent,
-                    tools=tools,
-                    guardrail=self._guardrail,
-                )
+                if isinstance(guardrail_result.result, str):
+                    task_output.raw = guardrail_result.result
+                    pydantic_output, json_output = self._export_output(
+                        guardrail_result.result
+                    )
+                    task_output.pydantic = pydantic_output
+                    task_output.json_dict = json_output
+                elif isinstance(guardrail_result.result, TaskOutput):
+                    task_output = guardrail_result.result

            self.output = task_output
            self.end_time = datetime.datetime.now()
@@ -694,9 +672,7 @@ Follow these guidelines:
        copied_data = {k: v for k, v in copied_data.items() if v is not None}

        cloned_context = (
-            self.context
-            if self.context is NOT_SPECIFIED
-            else [task_mapping[context_task.key] for context_task in self.context]
+            [task_mapping[context_task.key] for context_task in self.context]
            if isinstance(self.context, list)
            else None
        )
@@ -705,7 +681,7 @@ Follow these guidelines:
            return next((agent for agent in agents if agent.role == role), None)

        cloned_agent = get_agent_by_role(self.agent.role) if self.agent else None
-        cloned_tools = shallow_copy(self.tools) if self.tools else []
+        cloned_tools = copy(self.tools) if self.tools else []

        return self.__class__(
            **copied_data,
@@ -811,55 +787,3 @@ Follow these guidelines:
            Fingerprint: The fingerprint of the task
        """
        return self.security_config.fingerprint
-
-    def _invoke_guardrail_function(
-        self,
-        task_output: TaskOutput,
-        agent: BaseAgent,
-        tools: list[BaseTool],
-        guardrail: Callable | None,
-    ) -> TaskOutput:
-        if guardrail:
-            guardrail_result = process_guardrail(
-                output=task_output,
-                guardrail=guardrail,
-                retry_count=self.retry_count,
-                event_source=self,
-                from_task=self,
-                from_agent=agent,
-            )
-            if not guardrail_result.success:
-                if self.retry_count >= self.guardrail_max_retries:
-                    raise Exception(
-                        f"Task failed guardrail validation after {self.guardrail_max_retries} retries. "
-                        f"Last error: {guardrail_result.error}"
-                    )
-
-                self.retry_count += 1
-                context = self.i18n.errors("validation_error").format(
-                    guardrail_result_error=guardrail_result.error,
-                    task_output=task_output.raw,
-                )
-                printer = Printer()
-                printer.print(
-                    content=f"Guardrail blocked, retrying, due to: {guardrail_result.error}\n",
-                    color="yellow",
-                )
-                return self._execute_core(agent, context, tools)
-
-            if guardrail_result.result is None:
-                raise Exception(
-                    "Task guardrail returned None as result. This is not allowed."
-                )
-
-            if isinstance(guardrail_result.result, str):
-                task_output.raw = guardrail_result.result
-                pydantic_output, json_output = self._export_output(
-                    guardrail_result.result
-                )
-                task_output.pydantic = pydantic_output
-                task_output.json_dict = json_output
-            elif isinstance(guardrail_result.result, TaskOutput):
-                task_output = guardrail_result.result
-
-        return task_output
--- a/src/crewai/tools/tool_usage.py
+++ b/src/crewai/tools/tool_usage.py
@@ -1,6 +1,6 @@
-import ast
 import datetime
 import json
+import re
 import time
 from difflib import SequenceMatcher
 from json import JSONDecodeError
@@ -44,6 +44,183 @@ OPENAI_BIGGER_MODELS = [
 ]


+def _safe_literal_parse(input_str: str) -> Any:
+    """
+    Safely parse a limited subset of Python literal syntax without using ast.literal_eval.
+    Only supports: strings (single/double quotes), numbers, booleans, None, lists, dicts.
+    Rejects any input that could lead to code execution.
+    
+    Args:
+        input_str: String to parse
+        
+    Returns:
+        Parsed Python object
+        
+    Raises:
+        ValueError: If input contains unsafe or unsupported syntax
+    """
+    if not isinstance(input_str, str):
+        raise ValueError("Input must be a string")
+    
+    stripped = input_str.strip()
+    if not stripped:
+        raise ValueError("Input cannot be empty")
+    
+    # Check for potentially dangerous patterns
+    dangerous_patterns = [
+        r'__.*__',  # dunder methods
+        r'import\b',  # import statements
+        r'exec\b',  # exec function
+        r'eval\b',  # eval function
+        r'lambda\b',  # lambda functions
+        r'def\b',  # function definitions
+        r'class\b',  # class definitions
+        r'@\w+',  # decorators
+        r'\.\.\.',  # ellipsis (could be used in slicing)
+        r'->[^\]]*\]',  # type hints in lists
+    ]
+    
+    for pattern in dangerous_patterns:
+        if re.search(pattern, stripped, re.IGNORECASE):
+            raise ValueError(f"Potentially dangerous pattern detected: {pattern}")
+    
+    # Only allow specific characters
+    allowed_chars = r'[\s\w\.\-\+\*/\(\)\[\]\{\}:\'"<>!=,!=\?%&|~^`]'
+    if not re.fullmatch(f'{allowed_chars}*', stripped):
+        raise ValueError("Input contains unsupported characters")
+    
+    # Try JSON parsing first (safest)
+    try:
+        return json.loads(stripped)
+    except (json.JSONDecodeError, TypeError):
+        pass
+    
+    # Manual parsing for simple Python literals (JSON with single quotes, etc.)
+    try:
+        return _parse_python_literal_safe(stripped)
+    except Exception as e:
+        raise ValueError(f"Failed to parse input safely: {e}")
+
+
+def _parse_python_literal_safe(input_str: str) -> Any:
+    """
+    Parse a limited subset of Python literals safely.
+    
+    Args:
+        input_str: String to parse
+        
+    Returns:
+        Parsed Python object
+    """
+    # Handle None
+    if input_str == 'None':
+        return None
+    
+    # Handle booleans
+    if input_str == 'True':
+        return True
+    if input_str == 'False':
+        return False
+    
+    # Handle numbers
+    if re.fullmatch(r'-?\d+$', input_str):
+        return int(input_str)
+    if re.fullmatch(r'-?\d+\.\d+$', input_str):
+        return float(input_str)
+    
+    # Handle strings with single quotes (convert to JSON format)
+    if (input_str.startswith("'") and input_str.endswith("'")) or \
+       (input_str.startswith('"') and input_str.endswith('"')):
+        # Simple string - just remove quotes and escape common sequences
+        inner = input_str[1:-1]
+        # Handle common escape sequences safely
+        inner = inner.replace("\\'", "'").replace('\\"', '"').replace("\\\\", "\\")
+        return inner
+    
+    # Handle lists
+    if input_str.startswith('[') and input_str.endswith(']'):
+        inner = input_str[1:-1].strip()
+        if not inner:
+            return []
+        
+        items = _split_items_safe(inner)
+        return [_parse_python_literal_safe(item.strip()) for item in items]
+    
+    # Handle dictionaries
+    if input_str.startswith('{') and input_str.endswith('}'):
+        inner = input_str[1:-1].strip()
+        if not inner:
+            return {}
+        
+        pairs = _split_items_safe(inner)
+        result = {}
+        for pair in pairs:
+            if ':' not in pair:
+                raise ValueError(f"Invalid dict pair: {pair}")
+            
+            key_str, value_str = pair.split(':', 1)
+            key = _parse_python_literal_safe(key_str.strip())
+            value = _parse_python_literal_safe(value_str.strip())
+            if not isinstance(key, str):
+                raise ValueError(f"Dict keys must be strings, got {type(key)}")
+            result[key] = value
+        
+        return result
+    
+    raise ValueError(f"Unsupported literal format: {input_str}")
+
+
+def _split_items_safe(input_str: str, delimiter: str = ',') -> list[str]:
+    """
+    Split a list or dict string into items, respecting nested structures.
+    
+    Args:
+        input_str: String to split
+        delimiter: Delimiter to split on
+        
+    Returns:
+        List of item strings
+    """
+    items = []
+    current = []
+    depth = 0
+    in_string = False
+    string_char = None
+    i = 0
+    
+    while i < len(input_str):
+        char = input_str[i]
+        
+        # Handle string literals
+        if char in ('"', "'") and (i == 0 or input_str[i-1] != '\\'):
+            if not in_string:
+                in_string = True
+                string_char = char
+            elif char == string_char:
+                in_string = False
+                string_char = None
+        
+        # Track nesting depth when not in strings
+        elif not in_string:
+            if char in ('[', '(', '{'):
+                depth += 1
+            elif char in (']', ')', '}'):
+                depth -= 1
+            elif char == delimiter and depth == 0:
+                items.append(''.join(current).strip())
+                current = []
+                i += 1
+                continue
+        
+        current.append(char)
+        i += 1
+    
+    if current:
+        items.append(''.join(current).strip())
+    
+    return items
+
+
 class ToolUsageError(Exception):
    """Exception raised for errors in the tool usage."""

@@ -524,14 +701,14 @@ class ToolUsage:
        except (JSONDecodeError, TypeError):
            pass  # Continue to the next parsing attempt

-        # Attempt 2: Parse as Python literal
+        # Attempt 2: Parse as Python literal (safe alternative to ast.literal_eval)
        try:
-            arguments = ast.literal_eval(tool_input)
+            arguments = _safe_literal_parse(tool_input)
            if isinstance(arguments, dict):
                return arguments
-        except (ValueError, SyntaxError):
-            repaired_input = repair_json(tool_input)
+        except ValueError:
            # Continue to the next parsing attempt
+            pass

        # Attempt 3: Parse as JSON5
        try:
--- a/tests/cli/authentication/test_utils.py
+++ b/tests/cli/authentication/test_utils.py
@@ -1,7 +1,7 @@
+import jwt
 import unittest
 from unittest.mock import MagicMock, patch

-import jwt

 from crewai.cli.authentication.utils import validate_jwt_token

@@ -17,22 +17,19 @@ class TestUtils(unittest.TestCase):
            key="mock_signing_key"
        )

-        jwt_token = "aaaaa.bbbbbb.cccccc"  # noqa: S105
-
        decoded_token = validate_jwt_token(
-            jwt_token=jwt_token,
+            jwt_token="aaaaa.bbbbbb.cccccc",
            jwks_url="https://mock_jwks_url",
            issuer="https://mock_issuer",
            audience="app_id_xxxx",
        )

        mock_jwt.decode.assert_called_with(
-            jwt_token,
+            "aaaaa.bbbbbb.cccccc",
            "mock_signing_key",
            algorithms=["RS256"],
            audience="app_id_xxxx",
            issuer="https://mock_issuer",
-            leeway=10.0,
            options={
                "verify_signature": True,
                "verify_exp": True,
@@ -46,9 +43,9 @@ class TestUtils(unittest.TestCase):

    def test_validate_jwt_token_expired(self, mock_jwt, mock_pyjwkclient):
        mock_jwt.decode.side_effect = jwt.ExpiredSignatureError
-        with self.assertRaises(Exception):  # noqa: B017
+        with self.assertRaises(Exception):
            validate_jwt_token(
-                jwt_token="aaaaa.bbbbbb.cccccc",  # noqa: S106
+                jwt_token="aaaaa.bbbbbb.cccccc",
                jwks_url="https://mock_jwks_url",
                issuer="https://mock_issuer",
                audience="app_id_xxxx",
@@ -56,9 +53,9 @@ class TestUtils(unittest.TestCase):

    def test_validate_jwt_token_invalid_audience(self, mock_jwt, mock_pyjwkclient):
        mock_jwt.decode.side_effect = jwt.InvalidAudienceError
-        with self.assertRaises(Exception):  # noqa: B017
+        with self.assertRaises(Exception):
            validate_jwt_token(
-                jwt_token="aaaaa.bbbbbb.cccccc",  # noqa: S106
+                jwt_token="aaaaa.bbbbbb.cccccc",
                jwks_url="https://mock_jwks_url",
                issuer="https://mock_issuer",
                audience="app_id_xxxx",
@@ -66,9 +63,9 @@ class TestUtils(unittest.TestCase):

    def test_validate_jwt_token_invalid_issuer(self, mock_jwt, mock_pyjwkclient):
        mock_jwt.decode.side_effect = jwt.InvalidIssuerError
-        with self.assertRaises(Exception):  # noqa: B017
+        with self.assertRaises(Exception):
            validate_jwt_token(
-                jwt_token="aaaaa.bbbbbb.cccccc",  # noqa: S106
+                jwt_token="aaaaa.bbbbbb.cccccc",
                jwks_url="https://mock_jwks_url",
                issuer="https://mock_issuer",
                audience="app_id_xxxx",
@@ -78,9 +75,9 @@ class TestUtils(unittest.TestCase):
        self, mock_jwt, mock_pyjwkclient
    ):
        mock_jwt.decode.side_effect = jwt.MissingRequiredClaimError
-        with self.assertRaises(Exception):  # noqa: B017
+        with self.assertRaises(Exception):
            validate_jwt_token(
-                jwt_token="aaaaa.bbbbbb.cccccc",  # noqa: S106
+                jwt_token="aaaaa.bbbbbb.cccccc",
                jwks_url="https://mock_jwks_url",
                issuer="https://mock_issuer",
                audience="app_id_xxxx",
@@ -88,9 +85,9 @@ class TestUtils(unittest.TestCase):

    def test_validate_jwt_token_jwks_error(self, mock_jwt, mock_pyjwkclient):
        mock_jwt.decode.side_effect = jwt.exceptions.PyJWKClientError
-        with self.assertRaises(Exception):  # noqa: B017
+        with self.assertRaises(Exception):
            validate_jwt_token(
-                jwt_token="aaaaa.bbbbbb.cccccc",  # noqa: S106
+                jwt_token="aaaaa.bbbbbb.cccccc",
                jwks_url="https://mock_jwks_url",
                issuer="https://mock_issuer",
                audience="app_id_xxxx",
@@ -98,9 +95,9 @@ class TestUtils(unittest.TestCase):

    def test_validate_jwt_token_invalid_token(self, mock_jwt, mock_pyjwkclient):
        mock_jwt.decode.side_effect = jwt.InvalidTokenError
-        with self.assertRaises(Exception):  # noqa: B017
+        with self.assertRaises(Exception):
            validate_jwt_token(
-                jwt_token="aaaaa.bbbbbb.cccccc",  # noqa: S106
+                jwt_token="aaaaa.bbbbbb.cccccc",
                jwks_url="https://mock_jwks_url",
                issuer="https://mock_issuer",
                audience="app_id_xxxx",
--- a/tests/test_crew_thread_safety.py
+++ b/tests/test_crew_thread_safety.py
@@ -1,8 +1,7 @@
 import asyncio
 import threading
-from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any
+from typing import Dict, Any, Callable
 from unittest.mock import patch

 import pytest
@@ -25,12 +24,9 @@ def simple_agent_factory():

@pytest.fixture
 def simple_task_factory():
-    def create_task(name: str, agent: Agent, callback: Callable | None = None) -> Task:
+    def create_task(name: str, callback: Callable = None) -> Task:
        return Task(
-            description=f"Task for {name}",
-            expected_output="Done",
-            agent=agent,
-            callback=callback,
+            description=f"Task for {name}", expected_output="Done", callback=callback
        )

    return create_task
@@ -38,9 +34,10 @@ def simple_task_factory():

@pytest.fixture
 def crew_factory(simple_agent_factory, simple_task_factory):
-    def create_crew(name: str, task_callback: Callable | None = None) -> Crew:
+    def create_crew(name: str, task_callback: Callable = None) -> Crew:
        agent = simple_agent_factory(name)
-        task = simple_task_factory(name, agent=agent, callback=task_callback)
+        task = simple_task_factory(name, callback=task_callback)
+        task.agent = agent

        return Crew(agents=[agent], tasks=[task], verbose=False)

@@ -53,7 +50,7 @@ class TestCrewThreadSafety:
        mock_execute_task.return_value = "Task completed"
        num_crews = 5

-        def run_crew_with_context_check(crew_id: str) -> dict[str, Any]:
+        def run_crew_with_context_check(crew_id: str) -> Dict[str, Any]:
            results = {"crew_id": crew_id, "contexts": []}

            def check_context_task(output):
@@ -108,28 +105,28 @@ class TestCrewThreadSafety:
            before_ctx = next(
                ctx for ctx in result["contexts"] if ctx["stage"] == "before_kickoff"
            )
-            assert before_ctx["crew_id"] is None, (
-                f"Context should be None before kickoff for {result['crew_id']}"
-            )
+            assert (
+                before_ctx["crew_id"] is None
+            ), f"Context should be None before kickoff for {result['crew_id']}"

            task_ctx = next(
                ctx for ctx in result["contexts"] if ctx["stage"] == "task_callback"
            )
-            assert task_ctx["crew_id"] == crew_uuid, (
-                f"Context mismatch during task for {result['crew_id']}"
-            )
+            assert (
+                task_ctx["crew_id"] == crew_uuid
+            ), f"Context mismatch during task for {result['crew_id']}"

            after_ctx = next(
                ctx for ctx in result["contexts"] if ctx["stage"] == "after_kickoff"
            )
-            assert after_ctx["crew_id"] is None, (
-                f"Context should be None after kickoff for {result['crew_id']}"
-            )
+            assert (
+                after_ctx["crew_id"] is None
+            ), f"Context should be None after kickoff for {result['crew_id']}"

            thread_name = before_ctx["thread"]
-            assert "ThreadPoolExecutor" in thread_name, (
-                f"Should run in thread pool for {result['crew_id']}"
-            )
+            assert (
+                "ThreadPoolExecutor" in thread_name
+            ), f"Should run in thread pool for {result['crew_id']}"

    @pytest.mark.asyncio
    @patch("crewai.Agent.execute_task")
@@ -137,7 +134,7 @@ class TestCrewThreadSafety:
        mock_execute_task.return_value = "Task completed"
        num_crews = 5

-        async def run_crew_async(crew_id: str) -> dict[str, Any]:
+        async def run_crew_async(crew_id: str) -> Dict[str, Any]:
            task_context = {"crew_id": crew_id, "context": None}

            def capture_context(output):
@@ -165,12 +162,12 @@ class TestCrewThreadSafety:
            crew_uuid = result["crew_uuid"]
            task_ctx = result["task_context"]["context"]

-            assert task_ctx is not None, (
-                f"Context should exist during task for {result['crew_id']}"
-            )
-            assert task_ctx["crew_id"] == crew_uuid, (
-                f"Context mismatch for {result['crew_id']}"
-            )
+            assert (
+                task_ctx is not None
+            ), f"Context should exist during task for {result['crew_id']}"
+            assert (
+                task_ctx["crew_id"] == crew_uuid
+            ), f"Context mismatch for {result['crew_id']}"

    @patch("crewai.Agent.execute_task")
    def test_concurrent_kickoff_for_each(self, mock_execute_task, crew_factory):
@@ -196,9 +193,9 @@ class TestCrewThreadSafety:
        assert len(contexts_captured) == len(inputs)

        context_ids = [ctx["context_id"] for ctx in contexts_captured]
-        assert len(set(context_ids)) == len(inputs), (
-            "Each execution should have unique context"
-        )
+        assert len(set(context_ids)) == len(
+            inputs
+        ), "Each execution should have unique context"

    @patch("crewai.Agent.execute_task")
    def test_no_context_leakage_between_crews(self, mock_execute_task, crew_factory):
--- a/tests/test_task.py
+++ b/tests/test_task.py
@@ -1218,7 +1218,7 @@ def test_create_directory_false():
    assert not resolved_dir.exists()

    with pytest.raises(
-        RuntimeError, match=r"Directory .* does not exist and create_directory is False"
+        RuntimeError, match="Directory .* does not exist and create_directory is False"
    ):
        task._save_file("test content")

@@ -1635,48 +1635,3 @@ def test_task_interpolation_with_hyphens():
    assert "say hello world" in task.prompt()

    assert result.raw == "Hello, World!"
-
-
-def test_task_copy_with_none_context():
-    original_task = Task(
-        description="Test task",
-        expected_output="Test output",
-        context=None
-    )
-
-    new_task = original_task.copy(agents=[], task_mapping={})
-    assert original_task.context is None
-    assert new_task.context is None
-
-
-def test_task_copy_with_not_specified_context():
-    from crewai.utilities.constants import NOT_SPECIFIED
-    original_task = Task(
-        description="Test task",
-        expected_output="Test output",
-    )
-
-    new_task = original_task.copy(agents=[], task_mapping={})
-    assert original_task.context is NOT_SPECIFIED
-    assert new_task.context is NOT_SPECIFIED
-
-
-def test_task_copy_with_list_context():
-    """Test that copying a task with list context works correctly."""
-    task1 = Task(
-        description="Task 1",
-        expected_output="Output 1"
-    )
-    task2 = Task(
-        description="Task 2",
-        expected_output="Output 2",
-        context=[task1]
-    )
-
-    task_mapping = {task1.key: task1}
-
-    copied_task2 = task2.copy(agents=[], task_mapping=task_mapping)
-
-    assert isinstance(copied_task2.context, list)
-    assert len(copied_task2.context) == 1
-    assert copied_task2.context[0] is task1
--- a/tests/test_task_guardrails.py
+++ b/tests/test_task_guardrails.py
@@ -14,24 +14,6 @@ from crewai.tasks.llm_guardrail import LLMGuardrail
 from crewai.tasks.task_output import TaskOutput


-def create_smart_task(**kwargs):
-    """
-    Smart task factory that automatically assigns a mock agent when guardrails are present.
-    This maintains backward compatibility while handling the agent requirement for guardrails.
-    """
-    guardrails_list = kwargs.get("guardrails")
-    has_guardrails = kwargs.get("guardrail") is not None or (
-        guardrails_list is not None and len(guardrails_list) > 0
-    )
-
-    if has_guardrails and kwargs.get("agent") is None:
-        kwargs["agent"] = Agent(
-            role="test_agent", goal="test_goal", backstory="test_backstory"
-        )
-
-    return Task(**kwargs)
-
-
 def test_task_without_guardrail():
    """Test that tasks work normally without guardrails (backward compatibility)."""
    agent = Mock()
@@ -39,7 +21,7 @@ def test_task_without_guardrail():
    agent.execute_task.return_value = "test result"
    agent.crew = None

-    task = create_smart_task(description="Test task", expected_output="Output")
+    task = Task(description="Test task", expected_output="Output")

    result = task.execute_sync(agent=agent)
    assert isinstance(result, TaskOutput)
@@ -57,9 +39,7 @@ def test_task_with_successful_guardrail_func():
    agent.execute_task.return_value = "test result"
    agent.crew = None

-    task = create_smart_task(
-        description="Test task", expected_output="Output", guardrail=guardrail
-    )
+    task = Task(description="Test task", expected_output="Output", guardrail=guardrail)

    result = task.execute_sync(agent=agent)
    assert isinstance(result, TaskOutput)
@@ -77,7 +57,7 @@ def test_task_with_failing_guardrail():
    agent.execute_task.side_effect = ["bad result", "good result"]
    agent.crew = None

-    task = create_smart_task(
+    task = Task(
        description="Test task",
        expected_output="Output",
        guardrail=guardrail,
@@ -104,7 +84,7 @@ def test_task_with_guardrail_retries():
    agent.execute_task.return_value = "bad result"
    agent.crew = None

-    task = create_smart_task(
+    task = Task(
        description="Test task",
        expected_output="Output",
        guardrail=guardrail,
@@ -129,7 +109,7 @@ def test_guardrail_error_in_context():
    agent.role = "test_agent"
    agent.crew = None

-    task = create_smart_task(
+    task = Task(
        description="Test task",
        expected_output="Output",
        guardrail=guardrail,
@@ -197,7 +177,7 @@ def test_guardrail_emits_events(sample_agent):
    started_guardrail = []
    completed_guardrail = []

-    task = create_smart_task(
+    task = Task(
        description="Gather information about available books on the First World War",
        agent=sample_agent,
        expected_output="A list of available books on the First World War",
@@ -230,7 +210,7 @@ def test_guardrail_emits_events(sample_agent):
        def custom_guardrail(result: TaskOutput):
            return (True, "good result from callable function")

-        task = create_smart_task(
+        task = Task(
            description="Test task",
            expected_output="Output",
            guardrail=custom_guardrail,
@@ -282,7 +262,7 @@ def test_guardrail_when_an_error_occurs(sample_agent, task_output):
            match="Error while validating the task output: Unexpected error",
        ),
    ):
-        task = create_smart_task(
+        task = Task(
            description="Gather information about available books on the First World War",
            agent=sample_agent,
            expected_output="A list of available books on the First World War",
@@ -304,7 +284,7 @@ def test_hallucination_guardrail_integration():
        context="Test reference context for validation", llm=mock_llm, threshold=8.0
    )

-    task = create_smart_task(
+    task = Task(
        description="Test task with hallucination guardrail",
        expected_output="Valid output",
        guardrail=guardrail,
@@ -324,352 +304,3 @@ def test_hallucination_guardrail_description_in_events():

    event = LLMGuardrailStartedEvent(guardrail=guardrail, retry_count=0)
    assert event.guardrail == "HallucinationGuardrail (no-op)"
-
-
-def test_multiple_guardrails_sequential_processing():
-    """Test that multiple guardrails are processed sequentially."""
-
-    def first_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """First guardrail adds prefix."""
-        return (True, f"[FIRST] {result.raw}")
-
-    def second_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Second guardrail adds suffix."""
-        return (True, f"{result.raw} [SECOND]")
-
-    def third_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Third guardrail converts to uppercase."""
-        return (True, result.raw.upper())
-
-    agent = Mock()
-    agent.role = "sequential_agent"
-    agent.execute_task.return_value = "original text"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test sequential guardrails",
-        expected_output="Processed text",
-        guardrails=[first_guardrail, second_guardrail, third_guardrail],
-    )
-
-    result = task.execute_sync(agent=agent)
-    assert result.raw == "[FIRST] ORIGINAL TEXT [SECOND]"
-
-
-def test_multiple_guardrails_with_validation_failure():
-    """Test multiple guardrails where one fails validation."""
-
-    def length_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Ensure minimum length."""
-        if len(result.raw) < 10:
-            return (False, "Text too short")
-        return (True, result.raw)
-
-    def format_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Add formatting only if not already formatted."""
-        if not result.raw.startswith("Formatted:"):
-            return (True, f"Formatted: {result.raw}")
-        return (True, result.raw)
-
-    def validation_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Final validation."""
-        if "Formatted:" not in result.raw:
-            return (False, "Missing formatting")
-        return (True, result.raw)
-
-    # Use a callable that tracks calls and returns appropriate values
-    call_count = 0
-
-    def mock_execute_task(*args, **kwargs):
-        nonlocal call_count
-        call_count += 1
-        result = (
-            "short"
-            if call_count == 1
-            else "this is a longer text that meets requirements"
-        )
-        return result
-
-    agent = Mock()
-    agent.role = "validation_agent"
-    agent.execute_task = mock_execute_task
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test guardrails with validation",
-        expected_output="Valid formatted text",
-        guardrails=[length_guardrail, format_guardrail, validation_guardrail],
-        guardrail_max_retries=2,
-    )
-
-    result = task.execute_sync(agent=agent)
-    # The second call should be processed through all guardrails
-    assert result.raw == "Formatted: this is a longer text that meets requirements"
-    assert task.retry_count == 1
-
-
-def test_multiple_guardrails_with_mixed_string_and_taskoutput():
-    """Test guardrails that return both strings and TaskOutput objects."""
-
-    def string_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Returns a string."""
-        return (True, f"String: {result.raw}")
-
-    def taskoutput_guardrail(result: TaskOutput) -> tuple[bool, TaskOutput]:
-        """Returns a TaskOutput object."""
-        new_output = TaskOutput(
-            name=result.name,
-            description=result.description,
-            expected_output=result.expected_output,
-            raw=f"TaskOutput: {result.raw}",
-            agent=result.agent,
-            output_format=result.output_format,
-        )
-        return (True, new_output)
-
-    def final_string_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Final string transformation."""
-        return (True, f"Final: {result.raw}")
-
-    agent = Mock()
-    agent.role = "mixed_agent"
-    agent.execute_task.return_value = "original"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test mixed return types",
-        expected_output="Mixed processing",
-        guardrails=[string_guardrail, taskoutput_guardrail, final_string_guardrail],
-    )
-
-    result = task.execute_sync(agent=agent)
-    assert result.raw == "Final: TaskOutput: String: original"
-
-
-def test_multiple_guardrails_with_retry_on_middle_guardrail():
-    """Test that retry works correctly when a middle guardrail fails."""
-
-    call_count = {"first": 0, "second": 0, "third": 0}
-
-    def first_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Always succeeds."""
-        call_count["first"] += 1
-        return (True, f"First({call_count['first']}): {result.raw}")
-
-    def second_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Fails on first attempt, succeeds on second."""
-        call_count["second"] += 1
-        if call_count["second"] == 1:
-            return (False, "Second guardrail failed on first attempt")
-        return (True, f"Second({call_count['second']}): {result.raw}")
-
-    def third_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Always succeeds."""
-        call_count["third"] += 1
-        return (True, f"Third({call_count['third']}): {result.raw}")
-
-    agent = Mock()
-    agent.role = "retry_agent"
-    agent.execute_task.return_value = "base"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test retry in middle guardrail",
-        expected_output="Retry handling",
-        guardrails=[first_guardrail, second_guardrail, third_guardrail],
-        guardrail_max_retries=2,
-    )
-
-    result = task.execute_sync(agent=agent)
-    # Based on the test output, the behavior is different than expected
-    # The guardrails are called multiple times, so let's verify the retry happened
-    assert task.retry_count == 1
-    # Verify that the second guardrail eventually succeeded
-    assert "Second(2)" in result.raw or call_count["second"] >= 2
-
-
-def test_multiple_guardrails_with_max_retries_exceeded():
-    """Test that exception is raised when max retries exceeded with multiple guardrails."""
-
-    def passing_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Always passes."""
-        return (True, f"Passed: {result.raw}")
-
-    def failing_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Always fails."""
-        return (False, "This guardrail always fails")
-
-    agent = Mock()
-    agent.role = "failing_agent"
-    agent.execute_task.return_value = "test"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test max retries with multiple guardrails",
-        expected_output="Will fail",
-        guardrails=[passing_guardrail, failing_guardrail],
-        guardrail_max_retries=1,
-    )
-
-    with pytest.raises(Exception) as exc_info:
-        task.execute_sync(agent=agent)
-
-    assert "Task failed guardrail validation after 1 retries" in str(exc_info.value)
-    assert "This guardrail always fails" in str(exc_info.value)
-    assert task.retry_count == 1
-
-
-def test_multiple_guardrails_empty_list():
-    """Test that empty guardrails list works correctly."""
-
-    agent = Mock()
-    agent.role = "empty_agent"
-    agent.execute_task.return_value = "no guardrails"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test empty guardrails list",
-        expected_output="No processing",
-        guardrails=[],
-    )
-
-    result = task.execute_sync(agent=agent)
-    assert result.raw == "no guardrails"
-
-
-def test_multiple_guardrails_with_llm_guardrails():
-    """Test mixing callable and LLM guardrails."""
-
-    def callable_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Callable guardrail."""
-        return (True, f"Callable: {result.raw}")
-
-    # Create a proper mock agent without config issues
-    from crewai import Agent
-
-    agent = Agent(
-        role="mixed_guardrail_agent", goal="Test goal", backstory="Test backstory"
-    )
-
-    task = create_smart_task(
-        description="Test mixed guardrail types",
-        expected_output="Mixed processing",
-        guardrails=[callable_guardrail, "Ensure the output is professional"],
-        agent=agent,
-    )
-
-    # The LLM guardrail will be converted to LLMGuardrail internally
-    assert len(task._guardrails) == 2
-    assert callable(task._guardrails[0])
-    assert callable(task._guardrails[1])  # LLMGuardrail is callable
-
-
-def test_multiple_guardrails_processing_order():
-    """Test that guardrails are processed in the correct order."""
-
-    processing_order = []
-
-    def first_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        processing_order.append("first")
-        return (True, f"1-{result.raw}")
-
-    def second_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        processing_order.append("second")
-        return (True, f"2-{result.raw}")
-
-    def third_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        processing_order.append("third")
-        return (True, f"3-{result.raw}")
-
-    agent = Mock()
-    agent.role = "order_agent"
-    agent.execute_task.return_value = "base"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test processing order",
-        expected_output="Ordered processing",
-        guardrails=[first_guardrail, second_guardrail, third_guardrail],
-    )
-
-    result = task.execute_sync(agent=agent)
-    assert processing_order == ["first", "second", "third"]
-    assert result.raw == "3-2-1-base"
-
-
-def test_multiple_guardrails_with_pydantic_output():
-    """Test multiple guardrails with Pydantic output model."""
-    from pydantic import BaseModel, Field
-
-    class TestModel(BaseModel):
-        content: str = Field(description="The content")
-        processed: bool = Field(description="Whether it was processed")
-
-    def json_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Convert to JSON format."""
-        import json
-
-        data = {"content": result.raw, "processed": True}
-        return (True, json.dumps(data))
-
-    def validation_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Validate JSON structure."""
-        import json
-
-        try:
-            data = json.loads(result.raw)
-            if "content" not in data or "processed" not in data:
-                return (False, "Missing required fields")
-            return (True, result.raw)
-        except json.JSONDecodeError:
-            return (False, "Invalid JSON format")
-
-    agent = Mock()
-    agent.role = "pydantic_agent"
-    agent.execute_task.return_value = "test content"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test guardrails with Pydantic",
-        expected_output="Structured output",
-        guardrails=[json_guardrail, validation_guardrail],
-        output_pydantic=TestModel,
-    )
-
-    result = task.execute_sync(agent=agent)
-
-    # Verify the result is valid JSON and can be parsed
-    import json
-
-    parsed = json.loads(result.raw)
-    assert parsed["content"] == "test content"
-    assert parsed["processed"] is True
-
-
-def test_guardrails_vs_single_guardrail_mutual_exclusion():
-    """Test that guardrails list nullifies single guardrail."""
-
-    def single_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """Single guardrail - should be ignored."""
-        return (True, f"Single: {result.raw}")
-
-    def list_guardrail(result: TaskOutput) -> tuple[bool, str]:
-        """List guardrail - should be used."""
-        return (True, f"List: {result.raw}")
-
-    agent = Mock()
-    agent.role = "exclusion_agent"
-    agent.execute_task.return_value = "test"
-    agent.crew = None
-
-    task = create_smart_task(
-        description="Test mutual exclusion",
-        expected_output="Exclusion test",
-        guardrail=single_guardrail,  # This should be ignored
-        guardrails=[list_guardrail],  # This should be used
-    )
-
-    result = task.execute_sync(agent=agent)
-    # Should only use the guardrails list, not the single guardrail
-    assert result.raw == "List: test"
-    assert task._guardrail is None  # Single guardrail should be nullified