refactor: simplify TaskGuardrail to use LLM for validation, no code generation

2026-01-09 16:18:30 +00:00 · 2025-04-29 09:50:03 -03:00
parent e3ab80f517
commit e940ff3cbd
14 changed files with 3883 additions and 4217 deletions
--- a/docs/concepts/tasks.mdx
+++ b/docs/concepts/tasks.mdx
@@ -324,11 +324,7 @@ blog_task = Task(

 ### TaskGuardrail

-The `TaskGuardrail` class provides a sophisticated way to generate and execute validation code for task outputs. Here's how it works:
-
-#### Code Execution
-
-The generated code can be executed in two ways: Docker container (Default, Recommended) or current environment (unsafe mode)
+The `TaskGuardrail` class offers a robust mechanism for validating task outputs

 ### Error Handling Best Practices

@@ -791,7 +787,7 @@ task = Task(
 )
 ```

-##### Using YAML
+#### Using YAML

 ```yaml
 research_task:
--- a/src/crewai/tasks/task_guardrail.py
+++ b/src/crewai/tasks/task_guardrail.py
@@ -1,39 +1,40 @@
-from typing import Any, Tuple
+from typing import Any, Optional, Tuple

+from pydantic import BaseModel, Field
+
+from crewai.agent import Agent, LiteAgentOutput
 from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
-from crewai.utilities.printer import Printer
+
+
+class TaskGuardrailResult(BaseModel):
+    valid: bool = Field(
+        description="Whether the task output complies with the guardrail"
+    )
+    feedback: str | None = Field(
+        description="A feedback about the task output if it is not valid",
+        default=None,
+    )


 class TaskGuardrail:
    """A task that validates the output of another task using generated Python code.

-    This class generates and executes Python code to validate task outputs based on
-    specified criteria. It uses an LLM to generate the validation code and provides
-    safety guardrails for code execution.
-    The code is executed in a Docker container if available, otherwise it is executed in a sandboxed environment.
-    If unsafe mode is enabled, the code is executed in the current environment.
+    This class is used to validate the output from a Task based on specified criteria.
+    It uses an LLM to validate the output and provides a feedback if the output is not valid.

    Args:
        description (str): The description of the validation criteria.
        task (Task, optional): The task whose output needs validation.
        llm (LLM, optional): The language model to use for code generation.
-        additional_instructions (str, optional): Additional instructions for the guardrail task.
-        unsafe_mode (bool, optional): Whether to run the code in unsafe mode.
-    Raises:
-        ValueError: If no valid LLM is provided.
    """

-    generated_code: str = ""
-
    def __init__(
        self,
        description: str,
        task: Task | None = None,
        llm: LLM | None = None,
-        additional_instructions: str = "",
-        unsafe_mode: bool = False,
    ):
        self.description = description

@@ -47,84 +48,36 @@ class TaskGuardrail:
        )
        self.llm: LLM | None = llm or fallback_llm

-        self.additional_instructions = additional_instructions
-        self.unsafe_mode = unsafe_mode
+    def _validate_output(self, task_output: TaskOutput) -> LiteAgentOutput:
+        agent = Agent(
+            role="Guardrail Agent",
+            goal="Validate the output of the task",
+            backstory="You are a expert at validating the output of a task. By providing effective feedback if the output is not valid.",
+            llm=self.llm,
+        )

-    @property
-    def system_instructions(self) -> str:
-        """System instructions for the LLM code generation.
+        query = f"""
+        Ensure the following task result complies with the given guardrail.

-        Returns:
-            str: Complete system instructions including security constraints.
+        Task result:
+        {task_output.raw}
+
+        Guardrail:
+        {self.description}
+        
+        Your task:
+        - Confirm if the Task result complies with the guardrail.
+        - If not, provide clear feedback explaining what is wrong (e.g., by how much it violates the rule, or what specific part fails).
+        - Focus only on identifying issues — do not propose corrections.
+        - If the Task result complies with the guardrail, saying that is valid
        """
-        security_instructions = (
-            "- DO NOT wrap the output in markdown or use triple backticks. Return only raw Python code."
-            "- DO NOT use `exec`, `eval`, `compile`, `open`, `os`, `subprocess`, `socket`, `shutil`, or any other system-level modules.\n"
-            "- Your code must not perform any file I/O, shell access, or dynamic code execution."
-        )
-        return (
-            "You are a expert Python developer"
-            "You **must strictly** follow the task description, use the provided raw output as the input in your code. "
-            "Your code must:\n"
-            "- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n"
-            "- Use the literal string of the task output (already included in your input) if needed.\n"
-            "- Generate the code **following strictly** the task description.\n"
-            "- Be valid Python 3 — executable as-is.\n"
-            f"{security_instructions}\n"
-            "Additional instructions (do not override the previous instructions):\n"
-            f"{self.additional_instructions}"
-        )

-    def user_instructions(self, task_output: TaskOutput) -> str:
-        """Generates user instructions for the LLM code generation.
+        result = agent.kickoff(query, response_format=TaskGuardrailResult)

-        Args:
-            task_output (TaskOutput): The output to be validated.
-
-        Returns:
-            str: Instructions for generating validation code.
-        """
-        return (
-            "Based on the task description below, generate Python 3 code that validates the task output. \n"
-            "Task description:\n"
-            f"{self.description}\n"
-            "Here is the raw output from the task: \n"
-            f"'{task_output.raw}' \n"
-            "Use this exact string literal inside your generated code (do not reference variables like task_output.raw)."
-            "Now generate Python code that follows the instructions above."
-        )
-
-    def generate_code(self, task_output: TaskOutput) -> str:
-        """Generates Python code for validating the task output.
-
-        Args:
-            task_output (TaskOutput): The output to be validated.
-        """
-        if self.llm is None:
-            raise ValueError("Provide a valid LLM to the TaskGuardrail")
-
-        response = self.llm.call(
-            messages=[
-                {
-                    "role": "system",
-                    "content": self.system_instructions,
-                },
-                {
-                    "role": "user",
-                    "content": self.user_instructions(task_output=task_output),
-                },
-            ]
-        )
-
-        printer = Printer()
-        printer.print(
-            content=f"The following code was generated for the guardrail task:\n{response}\n",
-            color="cyan",
-        )
-        return response
+        return result

    def __call__(self, task_output: TaskOutput) -> Tuple[bool, Any]:
-        """Executes the validation code on the task output.
+        """Validates the output of a task based on specified criteria.

        Args:
            task_output (TaskOutput): The output to be validated.
@@ -134,28 +87,16 @@ class TaskGuardrail:
                - bool: True if validation passed, False otherwise
                - Any: The validation result or error message
        """
-        import ast

-        from crewai_tools import CodeInterpreterTool
+        try:
+            result = self._validate_output(task_output)
+            assert isinstance(
+                result.pydantic, TaskGuardrailResult
+            ), "The guardrail result is not a valid pydantic model"

-        self.generated_code = self.generate_code(task_output)
-
-        result = CodeInterpreterTool(
-            code=self.generated_code, unsafe_mode=self.unsafe_mode
-        ).run()
-
-        error_messages = [
-            "Something went wrong while running the code",
-            "No result variable found",  # when running in unsafe mode, the final output should be stored in the result variable
-        ]
-
-        if any(msg in result for msg in error_messages):
-            return False, result
-
-        if isinstance(result, str):
-            try:
-                result = ast.literal_eval(result)
-            except Exception as e:
-                return False, f"Error parsing result: {str(e)}"
-
-        return result
+            if result.pydantic.valid:
+                return True, task_output.raw
+            else:
+                return False, result.pydantic.feedback
+        except Exception as e:
+            return False, f"Error while validating the task output: {str(e)}"
--- a/src/crewai/utilities/events/task_guardrail_events.py
+++ b/src/crewai/utilities/events/task_guardrail_events.py
@@ -7,8 +7,8 @@ class TaskGuardrailStartedEvent(BaseEvent):
    """Event emitted when a guardrail task starts

    Attributes:
-        messages: Content can be either a string or a list of dictionaries that support
-            multimodal content (text, images, etc.)
+        guardrail: The guardrail callable or TaskGuardrail instance
+        retry_count: The number of times the guardrail has been retried
    """

    type: str = "task_guardrail_started"
@@ -23,8 +23,7 @@ class TaskGuardrailStartedEvent(BaseEvent):
        super().__init__(**data)

        if isinstance(self.guardrail, TaskGuardrail):
-            assert self.guardrail.generated_code is not None
-            self.guardrail = self.guardrail.generated_code.strip()
+            self.guardrail = self.guardrail.description.strip()
        elif isinstance(self.guardrail, Callable):
            self.guardrail = getsource(self.guardrail).strip()

--- a/tests/cassettes/test_after_kickoff_modification.yaml
+++ b/tests/cassettes/test_after_kickoff_modification.yaml
--- a/tests/cassettes/test_before_kickoff_modification.yaml
+++ b/tests/cassettes/test_before_kickoff_modification.yaml
--- a/tests/cassettes/test_before_kickoff_with_none_input.yaml
+++ b/tests/cassettes/test_before_kickoff_with_none_input.yaml
--- a/tests/cassettes/test_guardrail_emits_events.yaml
+++ b/tests/cassettes/test_guardrail_emits_events.yaml
--- a/tests/cassettes/test_guardrail_using_llm.yaml
+++ b/tests/cassettes/test_guardrail_using_llm.yaml
@@ -1,522 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
-      personal goal is: Test Goal\nTo give my best complete final answer to the task
-      respond using the exact following format:\n\nThought: I now can give a great
-      answer\nFinal Answer: Your final answer must be the great and the most complete
-      as possible, it must be outcome described.\n\nI MUST use these formats, my job
-      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
-      is the expected criteria for your final answer: Output\nyou MUST return the
-      actual complete content as the final answer, not a summary.\n\nBegin! This is
-      VERY important to you, use the tools available and give your best Final Answer,
-      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate, zstd
-      connection:
-      - keep-alive
-      content-length:
-      - '807'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.12
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//jFTBbuQ2DL3PVxA6e4KJm+zszq0ttkBORYuiPbSLASPRNndlSRHpmcwu
-        8u+FZCcz2ebQi2H78ZGPj6K+rQAMO7MDYwdUOya//ulX/a3tf/+Z3f3jX/xxdGGDfz60p7uvx+mj
-        aQoj3n8mq8+sKxvH5Ek5hhm2mVCpZL3e3ty22/bdpq3AGB35QuuTrm/ieuTA63bT3qw32/X1+4U9
-        RLYkZgd/rwAAvtVn0RkcPZodbJrnPyOJYE9m9xIEYHL05Y9BERbFoKY5gzYGpVCl30GIR7AYoOcD
-        AUJfZAMGOVIG+Cf8wgE9/Fi/d/DHQKAoXwAVBgwOMj1MnEkAoRiQaaAgNVNw4EiRPZUoSTEIgQ6o
-        MBKpgA4E9JjIKjmwmZUyI3QxQ5w0TXoFdwG6yXfsPYcedGB5LjdS0AZYgQVIhIIyetAINh4oA3oP
-        mTwdMCiglBpSBaUcD+wIllmVl+pEAxRkynMZ1KqNQxfziGWgpYz1hLkpBMtCTU2H1k4ZlfwJ0Llc
-        lMx9BSInELv6Ufy6Ks5l6mKmBu7gyN6XNj0HWgi2TDGfqrQYKKg0gK6QSl8YTtBP7Kgw5LkPN8uo
-        2umsvKsjm10s0nWIOU79UIOP5P06ZSqmlQRH1iFOCnFk1WIAjynmcmKW8QnEDDKNI2b+Ojv0ypzS
-        GgtgSjmiHebe+gkzBqVyogbuh/XDhJ719N1JEFSWjhfXFkPccixqdrm6PLiZukmwLE+YvL8AMIS4
-        EMrKfFqQp5cl8bFPOd7Ld1TTcWAZ9plQYigLIRqTqejTCuBTXcbp1X6ZlOOYdK/xC9Vy17c/zPnM
-        +Q64QK8/LKhGRX8G2m3bvJFwv9h+sc/Goh3Inann5cfJcbwAVhdt/1fOW7nn1jn0/yf9GbCWkpLb
-        p0yO7euWz2GZPtcFfzvsxeYq2AjlA1vaK1Muo3DU4eTnm8vISZTGfcehp5wyz9dXl/bd1t2+w/fY
-        WrN6Wv0LAAD//wMAAfXtOswFAAA=
-    headers:
-      CF-RAY:
-      - 934022059c2c0110-GRU
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 21 Apr 2025 21:56:45 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
-        path=/; expires=Mon, 21-Apr-25 22:26:45 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '2377'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999832'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_f39581c88a83855cf77c06098b787948
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
-      personal goal is: Test Goal\nTo give my best complete final answer to the task
-      respond using the exact following format:\n\nThought: I now can give a great
-      answer\nFinal Answer: Your final answer must be the great and the most complete
-      as possible, it must be outcome described.\n\nI MUST use these formats, my job
-      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
-      is the expected criteria for your final answer: Output\nyou MUST return the
-      actual complete content as the final answer, not a summary.\n\nThis is the context
-      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
-      Previous result:\nThe task at hand requires a comprehensive and detailed response
-      that meets the expected criteria for output. In fulfilling this requirement,
-      it is essential to cover all relevant aspects and provide complete content,
-      ensuring that the information is clear, concise, and accurately addresses the
-      needs of the task. Therefore, I will outline the necessary components, adhere
-      to any guidelines provided, and ensure that the final output is thorough and
-      well-presented, without omitting important details or summarizing the information.
-      This approach will guarantee a high-quality response that satisfies the outlined
-      expectations.\n\n\nTry again, making sure to address the validation error.\n\nBegin!
-      This is VERY important to you, use the tools available and give your best Final
-      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
-      ["\nObservation:"]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate, zstd
-      connection:
-      - keep-alive
-      content-length:
-      - '1619'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
-        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.12
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA4xWTW8cNwy9+1cQczTWi83GjhPf3CQFjKJI27hoizowuBJnhrZGUkVq15sg/72Q
-        ZvbDaQ69GOshRb1HPpL6cgLQsG2uoDE9qhmiO/vhg/56YR5uPy4uzfvFm5//ePfnb8vPf70y4Z+f
-        PjSzciKsHsjo7tTchCE6Ug5+NJtEqFSivrg8v1heLl8tLqphCJZcOdZFPTsPZwN7Plsuludni8uz
-        F6+n031gQ9Jcwd8nAABf6t+C01t6aq5gMdt9GUgEO2qu9k4ATQqufGlQhEXRazM7GE3wSr5CvwEf
-        NmDQQ8drAoSuwAb0sqEEcOd/ZI8Oruv/V3DbEyjKI6BCj96CJ0MirKgkgKB9SCF3PRSbJUV2ZCGR
-        xOCFQAOQl5wIarYYvSHYsPagPQE9RTJKFkxipcQIbUgQssas83p1TDxg2sKY+gKYpQSNKazZEvig
-        8JBFAUHyUF1XWWvwqTzlRyUPiVypTzmuE6sZoLWp8PEdoHPFh9boFVAKNKm00PaUiocGQL/dXW6h
-        y2zJsSeZw52/87cB0PRMawLtWWb1njY4FzbleEEUPHkV2LBzsCJgb1wuodhX513irmq8F3M4Pb3x
-        moLNpgjt9PQK3jrC5LZgqWVP9VTMKQYhCO2eWQUesrqdj3DnuWVTKxBaKMDXIy2syBL15KVqotZ+
-        fueh6GFZQLzbVfbao9sKS0Hy/gmHEp7Q9FPC9hBCZLMXh9vOCtOQYkio5coWjcoMLCrOKtR94g0K
-        gWi2TAIhAT1hqWOtuuQYQ9JaKSkKHEoy5/C+AvCdIxiKGFZVWi6ksdo7sSC02TnI3lIqLWILkgmw
-        5KowGFC1cr/zLwvxtw4T67Zi/CWRYZnK8LsQmFKKaorVRODQdxm7kvLKPExiPG4IE/yatsC+DWnA
-        Ulagth3l7bZzuF4HtoDDiru8u3pqIu1Rx3THwF5LM2BSNnlUNntA2OB29GMBQtmOV071tZXZeWUW
-        ivZ2dD7W7uHPo1geaQst1wSNLcBeuOtVwCbceGhTGKrjvuPHfI9sjjUwg0SVqakN1BPwUGq4k2H9
-        cpSJmEjIFzL1WpWdMgwdN26lcVFoXFvL5SQ6+I0k5GSoivOmBYzRscGVo9mu0SBRS4m8GcXl2D9W
-        ZbU5aU8JEmFVxYB1HjkZU1nG5V5FhwvrZHnSEmlKELCfUI5CErAsJovQmPnbURZ1PIBoykZzUSnG
-        mAKafgY342zoMib0SkW1B/EULAORSr1iam87zdGaQIFNz44mtGN3Jy6Bd6Ua+60PjkXZwJpp83xw
-        jIN+Djc6QimNXSeHc1vAKs6YaM0hC0TWFp0TkFxnAKyxy0dtMGrn21FcB2Oi1XbU9YhSUFnKXAip
-        ig/dbhMcb7JEbRYs29Rn544M6H2YUlB26KfJ8nW/NV3oYgor+eZo07Jn6e8ToQRfNqRoiE21fj0B
-        +FS3c362cJuYwhD1XsMj1euWb5ZjvObwKDhYXy5eT1YNiu5geLWYlvrzgPdjqeRowTcGTU/2cPTw
-        GsBsORwZTo5o/xfO92KP1Nl3/yf8wWAMRSV7HxNZNs8pH9wSPdQd/323fZor4EYordnQvTKlUgpL
-        LWY3PmUa2YrScN+y7yjFxON7po337aW9eIWvcWmak68n/wIAAP//AwA0/RJL3QkAAA==
-    headers:
-      CF-RAY:
-      - 93402216690b0110-GRU
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 21 Apr 2025 21:56:49 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '4451'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999631'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_8383a16d5f5b7f53d659bebf481ba936
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
-      personal goal is: Test Goal\nTo give my best complete final answer to the task
-      respond using the exact following format:\n\nThought: I now can give a great
-      answer\nFinal Answer: Your final answer must be the great and the most complete
-      as possible, it must be outcome described.\n\nI MUST use these formats, my job
-      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
-      is the expected criteria for your final answer: Output\nyou MUST return the
-      actual complete content as the final answer, not a summary.\n\nBegin! This is
-      VERY important to you, use the tools available and give your best Final Answer,
-      your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate, zstd
-      connection:
-      - keep-alive
-      content-length:
-      - '807'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
-        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.12
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA4xU227bRhB911cM+CwJtnyN3xygBoI+xG1doGgTCKPdITnRcpbdGUoRAgP9jf5e
-        v6TYpWTJjR8KEATIs3PmdvZ8mwBU7Ks7qFyL5ro+zN5/tJ/urz7cPjziw68ff3y6vXz8vb1/fLyw
-        n29/q6Y5Iq6+kLND1NzFrg9kHGWEXSI0yqznN5dXi5vF9flZAbroKeSwprfZZZx1LDxbnC0uZ2c3
-        s/PbfXQb2ZFWd/DHBADgW3nnOsXT1+oOClf505EqNlTdvRwCqFIM+U+FqqyGYtX0CLooRlJK/wAS
-        t+BQoOENAUKTywYU3VIC+CQPLBjgvnzfwROpgaGuFTAReFJuhDxYBNpgGNAItCfHNTvQNYegEBOs
-        JW4D+YaABRBy/hRDIA8kG05ROhKbw1NL4BIbJUaoYzpwsjSgg3OkCiwuDJ7ABUxsO4g1sKilweXR
-        6xQwcFP4YMvWQiBMkgnGdfGG8hnxYC1BoA2FTEHSYEMlylraAQV2bFCn2EGPydhxj2I6h1/23WEI
-        uykg2GEioG0cgn8pEMHl3CWXi+JYCfoUuz6nQIM4WGAhhW3+YgX62pMz8mPWXN7rzPfec+5xzMx2
-        SNhiWdyWQph5qjnvQ11Muek0rBK7MSGqUn4K9Z8Dhv34EmkfJQMrVPIQBfpEnoxSV8gOK5nD+x2Q
-        6FCoC2emOk6AFRIF2qDY2HaLIZA0LE0peMshANX1uIewg45Qh0TfNfvPX3+Dwx5XHNiYtLD1KW7Y
-        ExRNrELWknLTWtaExczBCQbxlLLgfdl5Xah1KLuHDs0ozeGHQwm50R4T5rFOSyM5qiNro9/LpCby
-        K3Rr6Mi1KKzdKP08SjHGAGPuF4HGGlB2x6lMYdtyIMCgETpkMeQiSJRT9WeN+MHloiweVZtLaFLc
-        Wjs/vcGJ6kExu4gMIZwAKBKt9FO84/MeeX5xixCbPsWV/ie0qllY22Ui1CjZGdRiXxX0eQLwubjS
-        8MpoqlHOS4trKunOry5Gvupohqfo9R61aBiOwMXZu+kbhEtPhhz0xNgqh64lfww9uiAOnuMJMDlp
-        +/ty3uIeW2dp/g/9EXCOeiO/zHeG3euWj8cSfSmX++1jL2MuBVdKacOOlsaU8io81TiE0cIr3alR
-        t6xZGkp94tHH635Z3/ira7zFhasmz5N/AQAA//8DAAhvMU7VBgAA
-    headers:
-      CF-RAY:
-      - 93402233baf00110-GRU
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 21 Apr 2025 21:56:56 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '6058'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999832'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_f5273114a4a797fd0928674edb442194
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
-      personal goal is: Test Goal\nTo give my best complete final answer to the task
-      respond using the exact following format:\n\nThought: I now can give a great
-      answer\nFinal Answer: Your final answer must be the great and the most complete
-      as possible, it must be outcome described.\n\nI MUST use these formats, my job
-      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Test task\n\nThis
-      is the expected criteria for your final answer: Output\nyou MUST return the
-      actual complete content as the final answer, not a summary.\n\nThis is the context
-      you''re working with:\n### Previous attempt failed validation: bad result\n\n\n###
-      Previous result:\nTest tasks are designed to evaluate specific skills or knowledge
-      in a controlled environment. The criteria for evaluating success include clarity
-      of instructions, alignment with learning objectives, and the level of engagement
-      they elicit from participants. Specifically, a test task should include a clear
-      and concise prompt that outlines what is expected from the participants. Additionally,
-      it should have a well-defined scoring rubric that assesses the quality of responses
-      based on predetermined criteria. By ensuring that the test task is relevant
-      and challenging, it will effectively measure the participants\u2019 capabilities
-      and provide valuable insights into their understanding of the subject matter.
-      Effective preparation, testing methods, and feedback mechanisms are essential
-      to the success of any test task, while also maintaining an environment conducive
-      to learning and growth.\n\n\nTry again, making sure to address the validation
-      error.\n\nBegin! This is VERY important to you, use the tools available and
-      give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
-      "gpt-4o-mini", "stop": ["\nObservation:"]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate, zstd
-      connection:
-      - keep-alive
-      content-length:
-      - '1887'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=9.xrptc4Zx5NtXl.2MzDRi3N1u8YVt6tNHmSwFyx94A-1745272605-1.0.1.1-v3SFlNedUJ2GFxpW0cts207UyNYzhzfJLBW4o_D8D1t15aRi1Bfh8TEkoVN8JQQdIgDqze4xz4.o3yDgegWJrUGzKroLzXP0VeCDkmLibTc;
-        _cfuvid=e_MIZNumotQmvbprZ3okpLcxs_RLI2Yb_jiAh0fYHT8-1745272605039-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.12
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//jFfbbhtHEn3XVxT4kl2DJGTZlmy9yV4nCBCs5ayxXmAdCMXumpkOe7rG
-        fSFFBAbyG/t7+yWLqh7OUIoC7AsBTnfX5dSpU92/nQEsnF1cw8J0mE0/+NXbD/njD+fv/ee/X92/
-        +9fN839+/Pj57e35hx8ub9+8WSzlBG9+JZOPp9aG+8FTdhzqsomEmcTq86uXry6uLi6fX+pCz5a8
-        HGuHvHrJq94Ft7o4v3i5Or9aPX89nu7YGUqLa/j3GQDAb/orcQZL94trOF8ev/SUEra0uJ42ASwi
-        e/mywJRcyhjyYjkvGg6Zgob+IwTeg8EArdsRILQSNmBIe4oAX8L3LqCHG/1/DZ8oZciYtgkwEqQc
-        i8klkgVMiVLqKeQEaSDjGmfQ+wNYSq4NZCEz0A59wSx+BozZGTdgyN8lSFvnfVrCNvDek21pCRgs
-        CKaUKRhHCfYudy4AgoQf2Xuy0ETsac9xu4ZPHSUaY0sUdwR98dkNnmAoceBEaQkuGF+sCy3kjsBZ
-        ClkDlbIBN5IQhTZ3Sd3vCbeBkp6U/T1hKpEkSdnsCWMQW1yy4V52ySnZOUTeuTQaRSPmceMJGiK7
-        QbNdw5fwJXxiqCwBDEBNQyZLEfIR5CW4DC6BhBCyQy8YGg7JWYqQaEcRPWzpAOQ1qnQtZp+v4dmz
-        dxKdxvOOg3GJ4MdQy+U4pGfPrgUwcCffoC8pw0ariq7tcsNxj9GqEcJ0EO8lWIpCKLuG27mECVLH
-        xVstINA9muwPsO+wxn8/kMlkBYzcUb/UWkolJUjlSOOEIrWl3I60nIJiP+SjafVWvMAlEAtAoHzY
-        4wGyuCLvehcwk5TvANhvXFtcPiyBQiqxll327SgeOJDExkGtJewJBmxpLQheCII33rVBa63R/nSs
-        9odjkBOKU8GOoVoXSREwHCNpyGpDPM2smewApYwb71InlOZYKXSCrsDhEuAUkeZDqaYju+f2k6Qi
-        edphyFq6nlD8NcUvAb3nvTgXLxgAjSlRwntEbYVEexI4zl1Z/UnnO1EQW7t6Q1NjW4XvhcD3PrTY
-        qkGB6Qb25P1qFoPHkAXOwMEfxkxOfG5KBvSJgdTiU8CIfm0I0HSOdmK9i1zaTqOMWLvq2CJLiIR+
-        teconBoGP7b/2L07jI4sfC2UVBQajj3mBH9JxXSAaVKVVZXopSQQj4q5FLgGdWnQg6WepcOqg7+u
-        YQZFY6bQYTAEPWe3qyKkuuO8d6EV6RF8p4QzgZCHUvYHBfqlAP1ZgP3b2EH/MKw8/7lsojNKUJ6Z
-        5g+zBEuJvxb0Lh+k4pHSwEGlDiGNVqJaOZZoQ2BpR54HsiPwDzdwyd6FatpElyk6VKaNTh2HxxLs
-        xWCSAAaKirXgMQmGHkYFXr4cqSnEqRIxuZljzBFDGjDSyH/DfV+CVLnS9ZQ/KiB2p04zQ1uc1fCd
-        VJEGrJVTrF8J1j/XxjJUlbVD7ym09KdCsCEwGKkpMglNxCZPLTP1aOY/9Pt3ikB0FAzVUTQSVWZI
-        5lMGJ0MBo+O0hhtrnU4a7w86OsYghkipgiFmIg/RCQEUekHUHNMY1TEYLhFbSoquElkm71ZqJrEM
-        kTee+lViv5NvIm1cMvCO4r4j3x+L+6BTBcNLFYZp0N3OEKvh78fpKHDeRidCqPPOyoQabZ4MR7T0
-        Val8UqoT4HUGW/qTQdWhXHeMGXssUuISjdCf7qkfPMZREHgYOOYSXJZLiPCntjet4ZZTXtVYDB8n
-        6W6e8iLFJhYjk1uYfIIHtJH3MgODPZmpLrQnGi2jssbWeCV/OFLzpFXEgI4UaCL3Ovd6ufFtqUJ+
-        pZcBxVBCex92LnI4yvJPKFpSbzc0L0lb7DtnuoeQSz5o5b6aMsmlb8SyYfkvukEhkd6jsKF8GPGj
-        IEr2dBkaIi8d2nDMR3bTvTA2jckKt9ou1zYYR8A0THvU7vfLibVKUhg4OS3FNG3nhlJY3h5EiDgO
-        LLypYKfTIfEw8Xk6jQNzGlSUHtD8v7//BwwOuHG+EmaaXnprdZuSK4tqchxaFu9TmJKkDt6j1Eo4
-        VerGW6YWQTihev+AOyqjj4J5eKPFSJiUi66X/tBsl6p7eisocpmfoxnweLBOKvWht04PZEsdnOin
-        C/D69JUhspdQXjqheH+ygCFwriNR3je/jCvfpheN51Y0Jj06umiEed2d5MBBXi8p87DQ1W9nAL/o
-        y6k8eAwt6h3yLvOW1N2LizfV3mJ+sM2rr168GFczZ/TzwuvLi+UTBu8sZXQ+nTy+FgZNR3Y+Or/U
-        sFjHJwtnJ2n/MZynbNfUXWj/H/PzgjE0ZLJ3QyTrzMOU522RftWp+/S2CWYNeCFPLGfoLjuKUgpL
-        DRZfn5mLdEiZ+rvGhZbiEF19azbDXXNlX13ia7wwi7NvZ/8DAAD//wMA87a9+nkPAAA=
-    headers:
-      CF-RAY:
-      - 9340225b9bca0110-GRU
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 21 Apr 2025 21:57:05 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '9141'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999564'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_0fc29337116c1d19a0543dfe5b0db291
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/tests/cassettes/test_guardrail_when_an_error_occurs.yaml
+++ b/tests/cassettes/test_guardrail_when_an_error_occurs.yaml
@@ -0,0 +1,288 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Test Agent. Test Backstory\nYour
+      personal goal is: Test Goal\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: Gather information
+      about available books on the First World War\n\nThis is the expected criteria
+      for your final answer: A list of available books on the First World War\nyou
+      MUST return the actual complete content as the final answer, not a summary.\n\nBegin!
+      This is VERY important to you, use the tools available and give your best Final
+      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
+      ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '903'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFdhb9w2Ev3uXzHQlwCBbewmThzvN1+Q2G3PQM+X1mjPhTFLjqSpKY46
+        pHazV+S/H0hK2t00Be6LYZEi9d68x8fZP08AKrbVCirTYjRd787+cU/3P7+9tfTPq+1PTffDtr7+
+        tf759+HG9tdSnaYVsv6dTJxWnRvpekeRxZdpo4SR0q7Ly4s3V68Xi+VlnujEkkvLmj6eXchZx57P
+        Xi1eXZwtLs+W78bVrbChUK3gPycAAH/mvwmnt/S5WsHidBrpKARsqFrNLwFUKi6NVBgCh4g+Vqf7
+        SSM+ks/QvwMvWzDooeENAUKTYAP6sCUFePQf2aOD6/y8gltSAg6AkOgqteRDWuc4RJAacIPscO0I
+        1iLPAcRDbAk+soYID6LOwgPq6tE/+uU5vHz5WH1qCf7tiPotumfSsIJb2cKHQaUneCAfIUpaA+xh
+        ebW8eKxgvYP3rXKI0rek8N6hPr98mcACwKeWQ/44WHIbCsA+SgZRBPoMNZooGiC2GMGRhXFehrhW
+        wufEIz1vUU9B6pqUfQPsAzdtPNivF8eRDTpAbyGIYXRgdx47NiFtQqhuB68WsQVDPg66G3mdJ/6v
+        Cv/rsSw/eSueVpDrEUV3E4ybLMhDApP4J7TLq+W7Uoeb8+/P4Y52pHMB8tOLAPS5xyKOR1WM6T8j
+        G8rMCchH1kwSthxbQKjFDFmxtcQWOnYcUXcQomKkhilknmltO3To0wdImbyhAOTtoGQTpCDOcvpK
+        etvwhh2jD4COnwvx1yNx5+BfA1OcTPJAIZJ6+KjiY6H3Qdm0cIfKCPfUof4x0Ez0GnrhxqOP4GVD
+        rghqsI+DUiGpFNhliAW74tDhAfAM+IY00Zlx2yELPrsVvjuFNQayE1AcYiupxLI9qkJmd7G39Veu
+        L5S+l9bDD0QN+plJeYReZcM21RksReTkzVmGNvl974pszpowDsfuTPJhjI7C6YFyp7N0yQDoHHDX
+        o8mlb5ysk4frGlkLhzeFw42IXe8oOS6J9anFUZZ7WZNGuFHcUDg+eThEWbM0in27AyUjg4/huGyH
+        xsHEdiz9VPm/Hj6EnjSIH8+a0fHgpdGeTPb2KI4RXzs2ce/WXkI8Sz4nxUzvbaH3HiOGqClE8sla
+        TalzI8k+stfsDj/DLYbIvtnTnQbAtCqejRs9RxtKjB2hTdCHPm+1N9OMq8TDrFaR/ID/i5Q1HFOq
+        7NUqELNdDVPk0XWXe9eljySp4IO3ZOFHQjPGyr2gHePjYiKmDSpFuENzx84deHIeSXI5mU7UcegV
+        U+bHo1RNF4zyeojfTtcsLnV9i4H/OzG2ZDiw+AAdWkroUglzkBiVEA6z893fHrIVXMP7chMT3JYz
+        M3ON7OGGXTLvsWvNtGI6ZRGfKYCOAGKrMjTtQW7Grw6iSgccAxgcQjFPesI6pmyJ7SkMofh4y5ZA
+        0TeUNggy6BQcV4VTcWJi9guhlmDM5LKchcqPODi4xe7Ai12Jb5qv3J43EtHBLu0idbk7jq1XPGZp
+        gyFialxSTKDbZUkSfie+OXPF50B1TSbO+28TqAx8uSjIk/FuRQON9WbTIjm4E+0HbWQGO5a8ZWeV
+        /IswpTc5Vzw2Bx1Cm/abrHYUHF9nRctN61IGTqNr8RbWFLdEHtBzh65kQr69AmDHNsSSGS1KEeED
+        mnZUNsw9TEqhAIPnPwY6jJyyXb6iyuENgGsZ4rdanlPo8HmE1sEG3ZC7JKXRAlCLAvqdeEodBmm6
+        DG1qemJiPqmZ+EseHp16ftjXKdVDwNRb+sG5gwn0XorEuaP8bZz5MveQTppeZR2+WlrV7Dm0T0oY
+        xKd+MTVdVZ79cgLwW+5Vh6P2s+pVuj4+RXmm/Lnl5dirVvsWeT/75vXFOBsTw/3E5WKaONrwaSz1
+        QbtbGTQt2f3SfW+Mg2U5mDg5oP1XON/au1Bn3/w/2+8njKE+kn3qlSybY8r715TST4i/e20ucwZc
+        BdING3qKTJqksFTj4EpjX4VdiNQ91ewb0l65dPd1/2TXaPDtwtaL6uTLyf8AAAD//wMA10Lu/OsM
+        AAA=
+    headers:
+      CF-RAY:
+      - 937ed42dee2e621f-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 29 Apr 2025 12:33:48 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=mLRCnpdB3n_6medIZWHnUu8MNRGZsD6riaRhN47PK74-1745930028-1.0.1.1-M2lDM1_V9hNCK0MZrBnFalF3lndC3JkS8zhDOGww_LmOrgdpU9fZLpNZUmyinCQOnlCjDjDYJUECM82ffT1anqBiO1NoDeNp91EPKiK7s.8;
+        path=/; expires=Tue, 29-Apr-25 13:03:48 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=eTrj_ZhCx2XuylS5vYROwUlPrJBwOyrbS2Ki.msl45E-1745930028010-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '10856'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999807'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_bc2d62d8325b2bdd3e98544a66389132
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Guardrail Agent. You
+      are a expert at validating the output of a task. By providing effective feedback
+      if the output is not valid.\nYour personal goal is: Validate the output of the
+      task\n\nTo give my best complete final answer to the task respond using the
+      exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
+      Your final answer must be the great and the most complete as possible, it must
+      be outcome described.\n\nI MUST use these formats, my job depends on it!\nIMPORTANT:
+      Your final answer MUST contain all the information requested in the following
+      format: {\n  \"valid\": bool,\n  \"feedback\": str | None\n}\n\nIMPORTANT: Ensure
+      the final output does not include any code block markers like ```json or ```python."},
+      {"role": "user", "content": "\n        Ensure the following task result complies
+      with the given guardrail.\n\n        Task result:\n        Here is a comprehensive
+      list of available books on the First World War:\n\n1. **\"The Sleepwalkers:
+      How Europe Went to War in 1914\" by Christopher Clark**  \n   This book delves
+      into the complex factors that led to the outbreak of the war, offering insights
+      into the political and social dynamics of early 20th century Europe.\n\n2. **\"A
+      World Undone: The Story of the Great War, 1914 to 1918\" by G.J. Meyer**  \n   Meyer''s
+      expansive narrative covers the entire war with a focus on both military strategies
+      and the human experiences endured by soldiers and civilians alike.\n\n3. **\"All
+      Quiet on the Western Front\" by Erich Maria Remarque**  \n   A poignant novel
+      that captures the resilience and trauma experienced by German soldiers during
+      World War I, based on the author''s own experiences.\n\n4. **\"The First World
+      War\" by John Keegan**  \n   Keegan provides a detailed military history of
+      the war, featuring insights on battles, strategies, and the overall impact on
+      global affairs.\n\n5. **\"Goodbye to All That\" by Robert Graves**  \n   This
+      autobiography recounts the author''s experiences as a soldier during the war,
+      offering a personal and critical perspective on the conflicts and the post-war
+      era.\n\n6. **\"Catastrophe 1914: Europe Goes to War\" by Max Hastings**  \n   Hastings
+      chronicles the events leading up to World War I and the early battles, detailing
+      the war''s initial impact on European societies.\n\n7. **\"The War That Ended
+      Peace: The Road to 1914\" by Margaret MacMillan**  \n   MacMillan explores the
+      political and historical factors that contributed to the outbreak of war, emphasizing
+      the decisions made by leaders across Europe.\n\n8. **\"The First World War:
+      A Complete History\" by Martin Gilbert**  \n   This complete history takes readers
+      through the entirety of the war, from its causes to its aftermath, using a wide
+      range of sources.\n\n9. **\"1914: The Year the World Ended\" by Paul Ham**  \n   Ham
+      focuses on the pivotal year of 1914 and the early war''s devastation, analyzing
+      its long-lasting effects on the world.\n\n10. **\"War Horse\" by Michael Morpurgo**  \n    This
+      children''s novel tells the story of a horse and his experiences during the
+      war, highlighting the bond between animals and humans amidst the chaos.\n\nEach
+      of these books offers unique perspectives and rich details about the First World
+      War, making them valuable resources for anyone interested in this pivotal period
+      in history.\n\n        Guardrail:\n        Ensure the authors are from Italy\n        \n        Your
+      task:\n        - Confirm if the Task result complies with the guardrail.\n        -
+      If not, provide clear feedback explaining what is wrong (e.g., by how much it
+      violates the rule, or what specific part fails).\n        - Focus only on identifying
+      issues \u2014 do not propose corrections.\n        - If the Task result complies
+      with the guardrail, saying that is valid\n        "}], "model": "gpt-4o-mini",
+      "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '3917'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=mLRCnpdB3n_6medIZWHnUu8MNRGZsD6riaRhN47PK74-1745930028-1.0.1.1-M2lDM1_V9hNCK0MZrBnFalF3lndC3JkS8zhDOGww_LmOrgdpU9fZLpNZUmyinCQOnlCjDjDYJUECM82ffT1anqBiO1NoDeNp91EPKiK7s.8;
+        _cfuvid=eTrj_ZhCx2XuylS5vYROwUlPrJBwOyrbS2Ki.msl45E-1745930028010-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFPLbtswELz7KxY8y4HsJHWsW9wiQVq0hzx6aBUIa3IlMaFIlVw5NQL/
+        e0Epiew+gF4EamZnuC8+TwCEViIDIWtk2bRmurqm29X1+/PNzYfL06+fFV8svz2sttuF+XHzUSRR
+        4dYPJPlVdSRd0xpi7exAS0/IFF1ni5PT5XE6m896onGKTJRVLU9P3LTRVk/n6fxkmi6ms7MXde20
+        pCAy+D4BAHjuvzFPq+inyCBNXpGGQsCKRPYWBCC8MxERGIIOjJZFMpLSWSbbp35bu66qOYMrsO4J
+        JFqo9IYAoYr5A9rwRB4gtxfaooHz/j+D59wC5GKDRqtcZFCiCZQMYEmk1igfI56LL84SuBK4JsCO
+        a+cDGB2YFGjbo4zhETyFzjCgJyi9a+CK0WyP4NyYA2VDNraY1BjpuCYP0nWWvaaQQOhkDRjgknyD
+        dpv0BnefEkCrhvPNUS5yu9vviaeyCxjnYjtj9gi01jHGS/tp3L8wu7f+G1e13q3Db1JRaqtDXXjC
+        4GzsdWDXip7dTQDu+zl3B6MTrXdNywW7R+qvWywXg58Y12tk370sgWDHaEb87PRVdeBXKGLUJuxt
+        ipAoa1KjdFwr7JR2e8Rkr+o/s/mb91C5ttX/2I+ElNQyqaL1pLQ8rHgM8xRf37/C3rrcJywC+Y2W
+        VLAmHyehqMTODG9ChG1gaopS24p86/XwMMq2SI+X87P5PF2mYrKb/AIAAP//AwD77a3iJgQAAA==
+    headers:
+      CF-RAY:
+      - 937ed6bd68faa435-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 29 Apr 2025 12:35:23 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '1138'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999072'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2ba1be014a5974ba354aff564e26516a
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/cassettes/test_multiple_before_after_kickoff.yaml
+++ b/tests/cassettes/test_multiple_before_after_kickoff.yaml
--- a/tests/cassettes/test_task_guardrail_process_output.yaml
+++ b/tests/cassettes/test_task_guardrail_process_output.yaml
@@ -0,0 +1,250 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Guardrail Agent. You
+      are a expert at validating the output of a task. By providing effective feedback
+      if the output is not valid.\nYour personal goal is: Validate the output of the
+      task\n\nTo give my best complete final answer to the task respond using the
+      exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
+      Your final answer must be the great and the most complete as possible, it must
+      be outcome described.\n\nI MUST use these formats, my job depends on it!\nIMPORTANT:
+      Your final answer MUST contain all the information requested in the following
+      format: {\n  \"valid\": bool,\n  \"feedback\": str | None\n}\n\nIMPORTANT: Ensure
+      the final output does not include any code block markers like ```json or ```python."},
+      {"role": "user", "content": "\n        Ensure the following task result complies
+      with the given guardrail.\n\n        Task result:\n        \n        Lorem Ipsum
+      is simply dummy text of the printing and typesetting industry. Lorem Ipsum has
+      been the industry''s standard dummy text ever\n        \n\n        Guardrail:\n        Ensure
+      the result has less than 10 words\n        \n        Your task:\n        - Confirm
+      if the Task result complies with the guardrail.\n        - If not, provide clear
+      feedback explaining what is wrong (e.g., by how much it violates the rule, or
+      what specific part fails).\n        - Focus only on identifying issues \u2014
+      do not propose corrections.\n        - If the Task result complies with the
+      guardrail, saying that is valid\n        "}], "model": "gpt-4o-mini", "stop":
+      ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1629'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFPLbtswELz7KxY824GkxLGtW4KiQB+XBmkRtAqENbmSmFAkQVJ2UsP/
+        HlByLKdNgV4IcGdnOPvgbgLApGA5MN5g4K1Vs+ub718rm324+5z+/CLt1dXD5fU3s1jd3Wx//GbT
+        yDDrB+LhlXXGTWsVBWn0AHNHGCiqpouL+XKVZum8B1ojSEVabcPswsxaqeUsS7KLWbKYpcsDuzGS
+        k2c5/JoAAOz6M/rUgp5YDsn0NdKS91gTy49JAMwZFSMMvZc+oA5sOoLc6EC6t37bmK5uQg6fQJst
+        cNRQyw0BQh39A2q/JQdQ6I9So4Kr/p7DrtAABdugkqJgOVSoPE2HYEUk1sgfY7xgtw1BQP8Ijnyn
+        AsTHUWoP6SVsjRN+CvTEiYTUNYSGoO7QCYdSgZKtDGAqqCiaCA1qSJOBBetnOAicFazQ+9MCHVWd
+        x9hk3Sl1AqDWJmAcUt/a+wOyPzZTmdo6s/Z/UFkltfRN6Qi90bFxPhjLenQ/Abjvh9a9mQOzzrQ2
+        lME8Uv/ceTIf9Ni4KyM6Tw9gMAHVCWt+OX1HrxQUUCp/MnbGkTckRuq4I9gJaU6AyUnVf7t5T3uo
+        XOr6f+RHgHOygURpHQnJ31Y8pjmKX+lfaccu94aZJ7eRnMogycVJCKqwU8OCM//sA7VlJXVNzjo5
+        bHlly+R8lS2zLFklbLKfvAAAAP//AwCHe/Jh8wMAAA==
+    headers:
+      CF-RAY:
+      - 937b20ddf9607def-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 29 Apr 2025 01:46:56 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=nHa2kVJI_yO1RIsmZcEednJ1e9UVy1liv_sjBNtSj7Q-1745891216-1.0.1.1-jUH9kFawVBjnbq8sIL2.MQx.p7JvBZWUhqlkNKRlStWSgQxT0eZMPcgq9TCQoJAjuyNwhqfpK4HuX6x5n8UbQgAb6JrWJEG823e6GpGROEA;
+        path=/; expires=Tue, 29-Apr-25 02:16:56 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=gg2UeahMCOOR8YhitRtzDwENMOnTOuQdyTMVJVHG0Mg-1745891216085-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '896'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999631'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_859221ed1aedb26cc9d335004ccf183e
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Guardrail Agent. You
+      are a expert at validating the output of a task. By providing effective feedback
+      if the output is not valid.\nYour personal goal is: Validate the output of the
+      task\n\nTo give my best complete final answer to the task respond using the
+      exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
+      Your final answer must be the great and the most complete as possible, it must
+      be outcome described.\n\nI MUST use these formats, my job depends on it!\nIMPORTANT:
+      Your final answer MUST contain all the information requested in the following
+      format: {\n  \"valid\": bool,\n  \"feedback\": str | None\n}\n\nIMPORTANT: Ensure
+      the final output does not include any code block markers like ```json or ```python."},
+      {"role": "user", "content": "\n        Ensure the following task result complies
+      with the given guardrail.\n\n        Task result:\n        \n        Lorem Ipsum
+      is simply dummy text of the printing and typesetting industry. Lorem Ipsum has
+      been the industry''s standard dummy text ever\n        \n\n        Guardrail:\n        Ensure
+      the result has less than 500 words\n        \n        Your task:\n        -
+      Confirm if the Task result complies with the guardrail.\n        - If not, provide
+      clear feedback explaining what is wrong (e.g., by how much it violates the rule,
+      or what specific part fails).\n        - Focus only on identifying issues \u2014
+      do not propose corrections.\n        - If the Task result complies with the
+      guardrail, saying that is valid\n        "}], "model": "gpt-4o-mini", "stop":
+      ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1630'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=nHa2kVJI_yO1RIsmZcEednJ1e9UVy1liv_sjBNtSj7Q-1745891216-1.0.1.1-jUH9kFawVBjnbq8sIL2.MQx.p7JvBZWUhqlkNKRlStWSgQxT0eZMPcgq9TCQoJAjuyNwhqfpK4HuX6x5n8UbQgAb6JrWJEG823e6GpGROEA;
+        _cfuvid=gg2UeahMCOOR8YhitRtzDwENMOnTOuQdyTMVJVHG0Mg-1745891216085-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jJJNb9swDIbv/hWEzvHgfHRpfesOG3opsGE7LYXBSLStRZY0iU43BPnv
+        g5wPu10H7GLAfPhSfEkeMgChlShByBZZdt7kH758e1wzbnfbO6o/f1osV3T/+BO7UNNDIWZJ4bY/
+        SPJF9U66zhti7ewJy0DIlKrO16ub27v5srgZQOcUmSRrPOcrl3fa6nxRLFZ5sc7nt2d167SkKEr4
+        ngEAHIZv6tMq+iVKKGaXSEcxYkOivCYBiOBMigiMUUdGy2I2Qukskx1a/9q6vmm5hAew7hkkWmj0
+        ngChSf0D2vhMAWBjP2qLBu6H/xIOGwuwEXs0Wm1ECRx6mp1iNZHaotylsO2N2djj9PFAdR/RnOEE
+        oLWOMQ1wsP10JserUeMaH9w2vpKKWlsd2yoQRmeTqcjOi4EeM4CnYaD9ixkJH1znuWK3o+G583KG
+        4Vz2ONLF7RmyYzQT1XI5e6NepYhRmzhZiZAoW1KjdNwf9kq7Ccgmrv/u5q3aJ+faNv9TfgRSkmdS
+        lQ+ktHzpeEwLlM78X2nXKQ8Ni0hhryVVrCmkTSiqsTen4xPxd2TqqlrbhoIP+nSBta/SueD7QtWF
+        yI7ZHwAAAP//AwAiLXhqjwMAAA==
+    headers:
+      CF-RAY:
+      - 937b2311ee091b1b-GRU
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 29 Apr 2025 01:48:26 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '610'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999631'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_c136835c16be6bc1e4d820f239c4b620
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/tests/config/tasks.yaml
+++ b/tests/config/tasks.yaml
@@ -6,7 +6,7 @@ research_task:
  expected_output: >
    A list with 10 bullet points of the most relevant information about {topic}
  agent: researcher
-  guardrail: make sure each bullet contains a minimum of 100 words
+  guardrail: ensure each bullet contains its source

 reporting_task:
  description: >
--- a/tests/project_test.py
+++ b/tests/project_test.py
@@ -143,31 +143,17 @@ def test_agent_function_calling_llm():
    ), "agent's function_calling_llm is incorrect"


-# VCR could not record the request to localhost from Docker to get it version, so we need to mock the tool.
-# TODO: We can remove this mock after some issue such as https://github.com/kevin1024/vcrpy/issues/519 been addressed
-@pytest.fixture
-def code_interpreter_tool_mock():
-    with patch(
-        "crewai_tools.tools.code_interpreter_tool.code_interpreter_tool.CodeInterpreterTool._run",
-        return_value="(True, 'good result')",
-    ):
-        yield
-
-
-def test_task_guardrail(code_interpreter_tool_mock):
+def test_task_guardrail():
    crew = InternalCrew()
    research_task = crew.research_task()
-    assert (
-        research_task.guardrail
-        == "make sure each bullet contains a minimum of 100 words"
-    )
+    assert research_task.guardrail == "ensure each bullet contains its source"

    reporting_task = crew.reporting_task()
    assert reporting_task.guardrail is None


@pytest.mark.vcr(filter_headers=["authorization"])
-def test_before_kickoff_modification(code_interpreter_tool_mock):
+def test_before_kickoff_modification():
    crew = InternalCrew()
    inputs = {"topic": "LLMs"}
    result = crew.crew().kickoff(inputs=inputs)
@@ -175,7 +161,7 @@ def test_before_kickoff_modification(code_interpreter_tool_mock):


@pytest.mark.vcr(filter_headers=["authorization"])
-def test_after_kickoff_modification(code_interpreter_tool_mock):
+def test_after_kickoff_modification():
    crew = InternalCrew()
    # Assuming the crew execution returns a dict
    result = crew.crew().kickoff({"topic": "LLMs"})
@@ -186,14 +172,14 @@ def test_after_kickoff_modification(code_interpreter_tool_mock):


@pytest.mark.vcr(filter_headers=["authorization"])
-def test_before_kickoff_with_none_input(code_interpreter_tool_mock):
+def test_before_kickoff_with_none_input():
    crew = InternalCrew()
    crew.crew().kickoff(None)
    # Test should pass without raising exceptions


@pytest.mark.vcr(filter_headers=["authorization"])
-def test_multiple_before_after_kickoff(code_interpreter_tool_mock):
+def test_multiple_before_after_kickoff():
    @CrewBase
    class MultipleHooksCrew:
        agents: List[BaseAgent]
--- a/tests/test_task_guardrails.py
+++ b/tests/test_task_guardrails.py
@@ -139,127 +139,32 @@ def sample_agent():
    return Agent(role="Test Agent", goal="Test Goal", backstory="Test Backstory")


-@pytest.mark.vcr(filter_headers=["authorization"])
-def test_guardrail_using_llm(sample_agent):
-    task = Task(
-        description="Test task",
-        expected_output="Output",
-        guardrail="Ensure the output is equal to 'good result'",
-    )
-
-    with patch(
-        "crewai.tasks.task_guardrail.TaskGuardrail.__call__",
-        side_effect=[(False, "bad result"), (True, "good result")],
-    ) as mock_guardrail:
-        task.execute_sync(agent=sample_agent)
-
-    assert mock_guardrail.call_count == 2
-
-    task.guardrail = TaskGuardrail(
-        description="Ensure the output is equal to 'good result'",
-        llm=LLM(model="gpt-4o-mini"),
-    )
-
-    with patch(
-        "crewai.tasks.task_guardrail.TaskGuardrail.__call__",
-        side_effect=[(False, "bad result"), (True, "good result")],
-    ) as mock_guardrail:
-        task.execute_sync(agent=sample_agent)
-
-    assert mock_guardrail.call_count == 2
-
-
@pytest.fixture
 def task_output():
    return TaskOutput(
-        raw="Test output",
+        raw="""
+        Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever
+        """,
        description="Test task",
        expected_output="Output",
        agent="Test Agent",
    )


-def test_task_guardrail_initialization_no_llm(task_output):
-    """Test TaskGuardrail initialization fails without LLM"""
-    with pytest.raises(ValueError, match="Provide a valid LLM to the TaskGuardrail"):
-        TaskGuardrail(description="Test")(task_output)
-
-
-@pytest.fixture
-def mock_llm():
-    llm = Mock(spec=LLM)
-    llm.call.return_value = """
-output = 'Sample book data'
-if isinstance(output, str):
-    result = (True, output)
-else:
-    result = (False, 'Invalid output format')
-print(result)
-"""
-    return llm
-
-
-@pytest.mark.parametrize(
-    "tool_run_output",
-    [
-        {
-            "output": "(True, 'Valid output')",
-            "expected_result": True,
-            "expected_output": "Valid output",
-        },
-        {
-            "output": "(False, 'Invalid output format')",
-            "expected_result": False,
-            "expected_output": "Invalid output format",
-        },
-        {
-            "output": "Something went wrong while running the code, Invalid output format",
-            "expected_result": False,
-            "expected_output": "Something went wrong while running the code, Invalid output format",
-        },
-        {
-            "output": "No result variable found",
-            "expected_result": False,
-            "expected_output": "No result variable found",
-        },
-        {
-            "output": (False, "Invalid output format"),
-            "expected_result": False,
-            "expected_output": "Invalid output format",
-        },
-        {
-            "output": "bla-bla-bla",
-            "expected_result": False,
-            "expected_output": "Error parsing result: malformed node or string on line 1",
-        },
-    ],
-)
-@patch("crewai_tools.CodeInterpreterTool.run")
-def test_task_guardrail_execute_code(mock_run, mock_llm, tool_run_output, task_output):
-    mock_run.return_value = tool_run_output["output"]
-
-    guardrail = TaskGuardrail(description="Test validation", llm=mock_llm)
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_task_guardrail_process_output(task_output):
+    guardrail = TaskGuardrail(description="Ensure the result has less than 10 words")

    result = guardrail(task_output)
-    assert result[0] == tool_run_output["expected_result"]
-    assert tool_run_output["expected_output"] in result[1]
+    assert result[0] is False

+    assert "exceeding the guardrail limit of fewer than" in result[1].lower()

-@patch("crewai_tools.CodeInterpreterTool.run")
-def test_guardrail_using_additional_instructions(mock_run, mock_llm, task_output):
-    mock_run.return_value = "(True, 'Valid output')"
-    additional_instructions = (
-        "This is an additional instruction created by the user follow it strictly"
-    )
-    guardrail = TaskGuardrail(
-        description="Test validation",
-        llm=mock_llm,
-        additional_instructions=additional_instructions,
-    )
+    guardrail = TaskGuardrail(description="Ensure the result has less than 500 words")

-    guardrail(task_output)
-
-    assert additional_instructions in str(mock_llm.call.call_args)
+    result = guardrail(task_output)
+    assert result[0] is True
+    assert result[1] == task_output.raw


@pytest.mark.vcr(filter_headers=["authorization"])
@@ -287,26 +192,13 @@ def test_guardrail_emits_events(sample_agent):
            )

        task = Task(
-            description="Test task",
-            expected_output="Output",
-            guardrail="Ensure the output is equal to 'good result'",
+            description="Gather information about available books on the First World War",
+            agent=sample_agent,
+            expected_output="A list of available books on the First World War",
+            guardrail="Ensure the authors are from Italy",
        )

-        with (
-            patch(
-                "crewai_tools.CodeInterpreterTool.run",
-                side_effect=[
-                    "Something went wrong while running the code",
-                    (True, "good result"),
-                ],
-            ),
-            patch(
-                "crewai.tasks.task_guardrail.TaskGuardrail.generate_code",
-                return_value="""def guardrail(result: TaskOutput):
-    return (True, result.raw.upper())""",
-            ),
-        ):
-            task.execute_sync(agent=sample_agent)
+        result = task.execute_sync(agent=sample_agent)

        def custom_guardrail(result: TaskOutput):
            return (True, "good result from callable function")
@@ -320,35 +212,26 @@ def test_guardrail_emits_events(sample_agent):
        task.execute_sync(agent=sample_agent)

        expected_started_events = [
-            {
-                "guardrail": """def guardrail(result: TaskOutput):
-    return (True, result.raw.upper())""",
-                "retry_count": 0,
-            },
-            {
-                "guardrail": """def guardrail(result: TaskOutput):
-    return (True, result.raw.upper())""",
-                "retry_count": 1,
-            },
+            {"guardrail": "Ensure the authors are from Italy", "retry_count": 0},
+            {"guardrail": "Ensure the authors are from Italy", "retry_count": 1},
            {
                "guardrail": """def custom_guardrail(result: TaskOutput):
            return (True, "good result from callable function")""",
                "retry_count": 0,
            },
        ]
+
        expected_completed_events = [
            {
                "success": False,
                "result": None,
-                "error": "Something went wrong while running the code",
+                "error": "The task result does not comply with the guardrail because none of "
+                "the listed authors are from Italy. All authors mentioned are from "
+                "different countries, including Germany, the UK, the USA, and others, "
+                "which violates the requirement that authors must be Italian.",
                "retry_count": 0,
            },
-            {
-                "success": True,
-                "result": "good result",
-                "error": None,
-                "retry_count": 1,
-            },
+            {"success": True, "result": result.raw, "error": None, "retry_count": 1},
            {
                "success": True,
                "result": "good result from callable function",
@@ -360,20 +243,23 @@ def test_guardrail_emits_events(sample_agent):
        assert completed_guardrail == expected_completed_events


-@pytest.mark.parametrize("unsafe_mode", [True, False])
-def test_task_guardrail_force_code_tool_unsafe_mode(mock_llm, task_output, unsafe_mode):
-    guardrail = TaskGuardrail(
-        description="Test validation", llm=mock_llm, unsafe_mode=unsafe_mode
-    )
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_guardrail_when_an_error_occurs(sample_agent, task_output):
    with (
        patch(
-            "crewai_tools.CodeInterpreterTool.__init__", return_value=None
-        ) as mock_init,
-        patch(
-            "crewai_tools.CodeInterpreterTool.run", return_value=(True, "Valid output")
+            "crewai.Agent.kickoff",
+            side_effect=Exception("Unexpected error"),
+        ),
+        pytest.raises(
+            Exception,
+            match="Error while validating the task output: Unexpected error",
        ),
    ):
-        result = guardrail(task_output)
-
-    mock_init.assert_called_once_with(code=ANY, unsafe_mode=unsafe_mode)
-    assert result == (True, "Valid output")
+        task = Task(
+            description="Gather information about available books on the First World War",
+            agent=sample_agent,
+            expected_output="A list of available books on the First World War",
+            guardrail="Ensure the authors are from Italy",
+            max_retries=0,
+        )
+        task.execute_sync(agent=sample_agent)